2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2004 Christian Costa
7 * Copyright 2005 Oliver Stieber
8 * Copyright 2006 Ivan Gyurdiev
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "wined3d_private.h"
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
36 /* Shader debugging - Change the following line to enable debugging of software
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
43 # define TRACE_VSVECTOR(name)
47 * DirectX9 SDK download
48 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
51 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
53 * Using Vertex Shaders
54 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
57 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
60 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
66 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
69 * http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
71 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72 * http://developer.nvidia.com/view.asp?IO=vstovp
74 * NVIDIA: Memory Management with VAR
75 * http://developer.nvidia.com/view.asp?IO=var_memory_management
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
81 #define GLNAME_REQUIRE_GLSL ((const char *)1)
83 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
84 /* This table is not order or position dependent. */
87 {WINED3DSIO_NOP, "nop", "NOP", 0, 0, vshader_hw_map2gl, NULL, 0, 0},
88 {WINED3DSIO_MOV, "mov", "MOV", 1, 2, vshader_hw_map2gl, shader_glsl_mov, 0, 0},
89 {WINED3DSIO_MOVA, "mova", NULL, 1, 2, vshader_hw_map2gl, shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
90 {WINED3DSIO_ADD, "add", "ADD", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
91 {WINED3DSIO_SUB, "sub", "SUB", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
92 {WINED3DSIO_MAD, "mad", "MAD", 1, 4, vshader_hw_map2gl, shader_glsl_mad, 0, 0},
93 {WINED3DSIO_MUL, "mul", "MUL", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
94 {WINED3DSIO_RCP, "rcp", "RCP", 1, 2, vshader_hw_rsq_rcp, shader_glsl_rcp, 0, 0},
95 {WINED3DSIO_RSQ, "rsq", "RSQ", 1, 2, vshader_hw_rsq_rcp, shader_glsl_rsq, 0, 0},
96 {WINED3DSIO_DP3, "dp3", "DP3", 1, 3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
97 {WINED3DSIO_DP4, "dp4", "DP4", 1, 3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
98 {WINED3DSIO_MIN, "min", "MIN", 1, 3, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
99 {WINED3DSIO_MAX, "max", "MAX", 1, 3, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
100 {WINED3DSIO_SLT, "slt", "SLT", 1, 3, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
101 {WINED3DSIO_SGE, "sge", "SGE", 1, 3, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
102 {WINED3DSIO_ABS, "abs", "ABS", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
103 {WINED3DSIO_EXP, "exp", "EX2", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
104 {WINED3DSIO_LOG, "log", "LG2", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
105 {WINED3DSIO_EXPP, "expp", "EXP", 1, 2, vshader_hw_map2gl, shader_glsl_expp, 0, 0},
106 {WINED3DSIO_LOGP, "logp", "LOG", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
107 {WINED3DSIO_LIT, "lit", "LIT", 1, 2, vshader_hw_map2gl, shader_glsl_lit, 0, 0},
108 {WINED3DSIO_DST, "dst", "DST", 1, 3, vshader_hw_map2gl, shader_glsl_dst, 0, 0},
109 {WINED3DSIO_LRP, "lrp", "LRP", 1, 4, NULL, shader_glsl_lrp, 0, 0},
110 {WINED3DSIO_FRC, "frc", "FRC", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
111 {WINED3DSIO_POW, "pow", "POW", 1, 3, vshader_hw_map2gl, shader_glsl_pow, 0, 0},
112 {WINED3DSIO_CRS, "crs", "XPD", 1, 3, vshader_hw_map2gl, shader_glsl_cross, 0, 0},
113 /* TODO: sng can possibly be performed a s
116 {WINED3DSIO_SGN, "sgn", NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
117 {WINED3DSIO_NRM, "nrm", NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
118 {WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
119 {WINED3DSIO_SINCOS, "sincos", "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
121 {WINED3DSIO_M4x4, "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
122 {WINED3DSIO_M4x3, "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
123 {WINED3DSIO_M3x4, "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
124 {WINED3DSIO_M3x3, "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
125 {WINED3DSIO_M3x2, "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
126 /* Declare registers */
127 {WINED3DSIO_DCL, "dcl", NULL, 0, 2, NULL, NULL, 0, 0},
128 /* Constant definitions */
129 {WINED3DSIO_DEF, "def", NULL, 1, 5, NULL, NULL, 0, 0},
130 {WINED3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
131 {WINED3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
132 /* Flow control - requires GLSL or software shaders */
133 {WINED3DSIO_REP , "rep", NULL, 0, 1, NULL, shader_glsl_rep, WINED3DVS_VERSION(2,0), -1},
134 {WINED3DSIO_ENDREP, "endrep", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
135 {WINED3DSIO_IF, "if", NULL, 0, 1, NULL, shader_glsl_if, WINED3DVS_VERSION(2,0), -1},
136 {WINED3DSIO_IFC, "ifc", NULL, 0, 2, NULL, shader_glsl_ifc, WINED3DVS_VERSION(2,1), -1},
137 {WINED3DSIO_ELSE, "else", NULL, 0, 0, NULL, shader_glsl_else, WINED3DVS_VERSION(2,0), -1},
138 {WINED3DSIO_ENDIF, "endif", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
139 {WINED3DSIO_BREAK, "break", NULL, 0, 0, NULL, shader_glsl_break, WINED3DVS_VERSION(2,1), -1},
140 {WINED3DSIO_BREAKC, "breakc", NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
141 {WINED3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
142 {WINED3DSIO_CALL, "call", NULL, 0, 1, NULL, shader_glsl_call, WINED3DVS_VERSION(2,0), -1},
143 {WINED3DSIO_CALLNZ, "callnz", NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
144 {WINED3DSIO_LOOP, "loop", NULL, 0, 2, NULL, shader_glsl_loop, WINED3DVS_VERSION(2,0), -1},
145 {WINED3DSIO_RET, "ret", NULL, 0, 0, NULL, NULL, WINED3DVS_VERSION(2,0), -1},
146 {WINED3DSIO_ENDLOOP,"endloop", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
147 {WINED3DSIO_LABEL, "label", NULL, 0, 1, NULL, shader_glsl_label, WINED3DVS_VERSION(2,0), -1},
149 {WINED3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
150 {WINED3DSIO_TEXLDL, "texldl", NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
151 {0, NULL, NULL, 0, 0, NULL, NULL, 0, 0}
154 static void vshader_set_limits(
155 IWineD3DVertexShaderImpl *This) {
157 This->baseShader.limits.texcoord = 0;
158 This->baseShader.limits.attributes = 16;
159 This->baseShader.limits.packed_input = 0;
161 /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
162 This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
164 switch (This->baseShader.hex_version) {
165 case WINED3DVS_VERSION(1,0):
166 case WINED3DVS_VERSION(1,1):
167 This->baseShader.limits.temporary = 12;
168 This->baseShader.limits.constant_bool = 0;
169 This->baseShader.limits.constant_int = 0;
170 This->baseShader.limits.address = 1;
171 This->baseShader.limits.packed_output = 0;
172 This->baseShader.limits.sampler = 0;
173 This->baseShader.limits.label = 0;
176 case WINED3DVS_VERSION(2,0):
177 case WINED3DVS_VERSION(2,1):
178 This->baseShader.limits.temporary = 12;
179 This->baseShader.limits.constant_bool = 16;
180 This->baseShader.limits.constant_int = 16;
181 This->baseShader.limits.address = 1;
182 This->baseShader.limits.packed_output = 0;
183 This->baseShader.limits.sampler = 0;
184 This->baseShader.limits.label = 16;
187 case WINED3DVS_VERSION(3,0):
188 This->baseShader.limits.temporary = 32;
189 This->baseShader.limits.constant_bool = 32;
190 This->baseShader.limits.constant_int = 32;
191 This->baseShader.limits.address = 1;
192 This->baseShader.limits.packed_output = 12;
193 This->baseShader.limits.sampler = 4;
194 This->baseShader.limits.label = 16; /* FIXME: 2048 */
197 default: This->baseShader.limits.temporary = 12;
198 This->baseShader.limits.constant_bool = 16;
199 This->baseShader.limits.constant_int = 16;
200 This->baseShader.limits.address = 1;
201 This->baseShader.limits.packed_output = 0;
202 This->baseShader.limits.sampler = 0;
203 This->baseShader.limits.label = 16;
204 FIXME("Unrecognized vertex shader version %#x\n",
205 This->baseShader.hex_version);
209 /* This is an internal function,
210 * used to create fake semantics for shaders
211 * that don't have them - d3d8 shaders where the declaration
212 * stores the register for each input
214 static void vshader_set_input(
215 IWineD3DVertexShaderImpl* This,
217 BYTE usage, BYTE usage_idx) {
219 /* Fake usage: set reserved bit, usage, usage_idx */
220 DWORD usage_token = (0x1 << 31) |
221 (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
223 /* Fake register; set reserved bit, regnum, type: input, wmask: all */
224 DWORD reg_token = (0x1 << 31) |
225 WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
227 This->semantics_in[regnum].usage = usage_token;
228 This->semantics_in[regnum].reg = reg_token;
231 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
232 if (usage_idx1 != usage_idx2) return FALSE;
233 if (usage1 == usage2) return TRUE;
234 if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
235 if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
240 BOOL vshader_get_input(
241 IWineD3DVertexShader* iface,
242 BYTE usage_req, BYTE usage_idx_req,
243 unsigned int* regnum) {
245 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
248 for (i = 0; i < MAX_ATTRIBS; i++) {
249 DWORD usage_token = This->semantics_in[i].usage;
250 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
251 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
253 if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
261 BOOL vshader_input_is_color(
262 IWineD3DVertexShader* iface,
263 unsigned int regnum) {
265 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
267 DWORD usage_token = This->semantics_in[regnum].usage;
268 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
269 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
273 for(i = 0; i < This->num_swizzled_attribs; i++) {
274 if(This->swizzled_attribs[i].usage == usage &&
275 This->swizzled_attribs[i].idx == usage_idx) {
282 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
284 UINT numoldswizzles = This->num_swizzled_attribs;
285 IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
287 DWORD usage_token, usage, usage_idx;
290 attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
292 /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
293 memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
295 memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
297 for(i = 0; i < decl->num_swizzled_attribs; i++) {
298 for(j = 0; j < MAX_ATTRIBS; j++) {
300 if(!This->baseShader.reg_maps.attributes[j]) continue;
302 usage_token = This->semantics_in[j].usage;
303 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
304 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
306 if(decl->swizzled_attribs[i].usage == usage &&
307 decl->swizzled_attribs[i].idx == usage_idx) {
308 This->swizzled_attribs[num].usage = usage;
309 This->swizzled_attribs[num].idx = usage_idx;
315 /* Add previously converted attributes back in if they are not defined in the current declaration */
316 for(i = 0; i < numoldswizzles; i++) {
319 for(j = 0; j < decl->declarationWNumElements; j++) {
320 if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
321 oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
326 /* This previously converted attribute is declared in the current declaration. Either it is
327 * already in the new array, or it should not be there. Skip it
331 /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
332 * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
333 * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
334 * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
335 * stays unswizzled as well because it isn't found in the oldswizzles array
337 for(j = 0; j < num; j++) {
338 if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
339 oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
340 oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
341 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
342 sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
346 This->swizzled_attribs[j].usage = oldswizzles[i].usage;
347 This->swizzled_attribs[j].idx = oldswizzles[i].idx;
351 TRACE("New swizzled attributes array\n");
352 for(i = 0; i < num; i++) {
353 TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
354 This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
356 This->num_swizzled_attribs = num;
358 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
359 or GLSL and send it to the card */
360 static VOID IWineD3DVertexShaderImpl_GenerateShader(
361 IWineD3DVertexShader *iface,
362 shader_reg_maps* reg_maps,
363 CONST DWORD *pFunction) {
365 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
366 IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
367 SHADER_BUFFER buffer;
369 find_swizzled_attribs(decl, This);
371 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
372 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
373 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
374 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
375 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
376 This->fixupVertexBufferSize = PGMSIZE;
377 This->fixupVertexBuffer[0] = 0;
379 buffer.buffer = This->device->fixupVertexBuffer;
381 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
385 buffer.newline = TRUE;
387 if (This->baseShader.shader_mode == SHADER_GLSL) {
389 /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
390 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
392 /* Base Declarations */
393 shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
395 /* Base Shader Body */
396 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
398 /* Unpack 3.0 outputs */
399 if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0)) {
400 shader_addline(&buffer, "order_ps_input(OUT);\n");
402 shader_addline(&buffer, "order_ps_input();\n");
405 /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
407 shader_addline(&buffer, "gl_FogFragCoord = gl_Position.z;\n");
409 /* Write the final position.
411 * OpenGL coordinates specify the center of the pixel while d3d coords specify
412 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
413 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
414 * contains 1.0 to allow a mad.
416 shader_addline(&buffer, "gl_Position.xy = gl_Position.xy * posFixup.xy + posFixup.zw;\n");
418 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
420 * Basically we want(in homogenous coordinates) z = z * 2 - 1. However, shaders are run
421 * before the homogenous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
422 * which is the same as z = z / 2 - w.
424 shader_addline(&buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
426 shader_addline(&buffer, "}\n");
428 TRACE("Compiling shader object %u\n", shader_obj);
429 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
430 GL_EXTCALL(glCompileShaderARB(shader_obj));
431 print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
433 /* Store the shader object */
434 This->baseShader.prgId = shader_obj;
436 } else if (This->baseShader.shader_mode == SHADER_ARB) {
438 /* Create the hw ARB shader */
439 shader_addline(&buffer, "!!ARBvp1.0\n");
440 shader_addline(&buffer, "PARAM helper_const = { 2.0, -1.0, %d.0, 0.0 };\n", This->rel_offset);
442 /* Mesa supports only 95 constants */
443 if (GL_VEND(MESA) || GL_VEND(WINE))
444 This->baseShader.limits.constant_float =
445 min(95, This->baseShader.limits.constant_float);
447 /* Some instructions need a temporary register. Add it if needed, but only if it is really needed */
448 if(reg_maps->usesnrm || This->rel_offset) {
449 shader_addline(&buffer, "TEMP TMP;\n");
452 /* Base Declarations */
453 shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
455 /* We need a constant to fixup the final position */
456 shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
458 /* Base Shader Body */
459 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
461 /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
463 shader_addline(&buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
465 /* Write the final position.
467 * OpenGL coordinates specify the center of the pixel while d3d coords specify
468 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
469 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
470 * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
472 shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, posFixup.z;\n");
473 shader_addline(&buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, posFixup.w;\n");
475 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
476 * and the glsl equivalent
478 shader_addline(&buffer, "MAD TMP_OUT.z, TMP_OUT.z, helper_const.x, -TMP_OUT.w;\n");
480 shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
482 shader_addline(&buffer, "END\n");
484 /* TODO: change to resource.glObjectHandle or something like that */
485 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
487 TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
488 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
490 TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
491 /* Create the program and check for errors */
492 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
493 buffer.bsize, buffer.buffer));
495 if (glGetError() == GL_INVALID_OPERATION) {
497 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
498 FIXME("HW VertexShader Error at position %d: %s\n",
499 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
500 This->baseShader.prgId = -1;
504 #if 1 /* if were using the data buffer of device then we don't need to free it */
505 HeapFree(GetProcessHeap(), 0, buffer.buffer);
509 /* *******************************************
510 IWineD3DVertexShader IUnknown parts follow
511 ******************************************* */
512 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
513 return IWineD3DBaseShaderImpl_QueryInterface((IWineD3DBaseShader *) iface, riid, ppobj);
516 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
517 return IWineD3DBaseShaderImpl_AddRef((IWineD3DBaseShader *) iface);
520 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
521 return IWineD3DBaseShaderImpl_Release((IWineD3DBaseShader *) iface);
524 /* *******************************************
525 IWineD3DVertexShader IWineD3DVertexShader parts follow
526 ******************************************* */
528 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
529 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
531 *parent = This->parent;
532 IUnknown_AddRef(*parent);
533 TRACE("(%p) : returning %p\n", This, *parent);
537 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
538 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
539 IWineD3DDevice_AddRef(This->baseShader.device);
540 *pDevice = This->baseShader.device;
541 TRACE("(%p) returning %p\n", This, *pDevice);
545 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
546 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
547 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
550 *pSizeOfData = This->baseShader.functionLength;
553 if (*pSizeOfData < This->baseShader.functionLength) {
554 /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
555 * than the required size we should write the required size and
556 * return D3DERR_MOREDATA. That's not actually true. */
557 return WINED3DERR_INVALIDCALL;
559 if (NULL == This->baseShader.function) { /* no function defined */
560 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
561 (*(DWORD **) pData) = NULL;
563 if(This->baseShader.functionLength == 0){
566 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
567 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
572 /* Note that for vertex shaders CompileShader isn't called until the
573 * shader is first used. The reason for this is that we need the vertex
574 * declaration the shader will be used with in order to determine if
575 * the data in a register is of type D3DCOLOR, and needs swizzling. */
576 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
578 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
579 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
581 shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
583 TRACE("(%p) : pFunction %p\n", iface, pFunction);
585 /* First pass: trace shader */
586 shader_trace_init((IWineD3DBaseShader*) This, pFunction);
587 vshader_set_limits(This);
589 /* Initialize immediate constant lists */
590 list_init(&This->baseShader.constantsF);
591 list_init(&This->baseShader.constantsB);
592 list_init(&This->baseShader.constantsI);
594 /* Second pass: figure out registers used, semantics, etc.. */
595 This->min_rel_offset = GL_LIMITS(vshader_constantsF);
596 This->max_rel_offset = 0;
597 memset(reg_maps, 0, sizeof(shader_reg_maps));
598 hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
599 This->semantics_in, This->semantics_out, pFunction, NULL);
600 if (hr != WINED3D_OK) return hr;
602 This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
604 if(deviceImpl->vs_selected_mode == SHADER_ARB &&
605 (GLINFO_LOCATION).arb_vs_offset_limit &&
606 This->min_rel_offset <= This->max_rel_offset) {
608 if(This->max_rel_offset - This->min_rel_offset > 127) {
609 FIXME("The difference between the minimum and maximum relative offset is > 127\n");
610 FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
611 FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
612 } else if(This->max_rel_offset - This->min_rel_offset > 63) {
613 This->rel_offset = This->min_rel_offset + 63;
614 } else if(This->max_rel_offset > 63) {
615 This->rel_offset = This->min_rel_offset;
617 This->rel_offset = 0;
620 This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
622 /* copy the function ... because it will certainly be released by application */
623 if (NULL != pFunction) {
626 function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
627 if (!function) return E_OUTOFMEMORY;
628 memcpy(function, pFunction, This->baseShader.functionLength);
629 This->baseShader.function = function;
631 This->baseShader.function = NULL;
637 /* Preload semantics for d3d8 shaders */
638 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
639 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
640 IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
643 for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
644 WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
645 vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
649 /* Set local constants for d3d8 shaders */
650 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
651 UINT start_idx, const float *src_data, UINT count) {
652 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
655 TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
657 end_idx = start_idx + count;
658 if (end_idx > GL_LIMITS(vshader_constantsF)) {
659 WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
660 end_idx = GL_LIMITS(vshader_constantsF);
663 for (i = start_idx; i < end_idx; ++i) {
664 local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
665 if (!lconst) return E_OUTOFMEMORY;
668 memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
669 list_add_head(&This->baseShader.constantsF, &lconst->entry);
675 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
683 for(i = 0; i < vdecl->declarationWNumElements; i++) {
684 for(j = 0; j < MAX_ATTRIBS; j++) {
685 if(!This->baseShader.reg_maps.attributes[j]) continue;
687 usage_token = This->semantics_in[j].usage;
688 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
689 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
691 if(vdecl->pDeclarationWine[i].Usage != usage ||
692 vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
697 for(k = 0; k < This->num_swizzled_attribs; k++) {
698 if(This->swizzled_attribs[k].usage == usage &&
699 This->swizzled_attribs[k].idx == usage_idx) {
703 if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
704 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
705 debug_d3ddeclusage(usage), usage_idx);
708 if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
709 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
710 debug_d3ddeclusage(usage), usage_idx);
718 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
719 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
720 IWineD3DVertexDeclarationImpl *vdecl;
721 CONST DWORD *function = This->baseShader.function;
722 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
724 TRACE("(%p) : function %p\n", iface, function);
726 /* We're already compiled. */
727 if (This->baseShader.is_compiled) {
728 vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
730 if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
731 memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
733 /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
734 * we have to recompile, but we have to take a deeper look at see if the attribs that differ
735 * are declared in the decl and used in the shader
737 if(swizzled_attribs_differ(This, vdecl)) {
738 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
741 WARN("Swizzled attribute validation required an expensive comparison\n");
747 if(This->recompile_count < 50) {
748 This->recompile_count++;
750 FIXME("Vertexshader %p recompiled more than 50 times\n", This);
753 deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
756 /* We don't need to compile */
758 This->baseShader.is_compiled = TRUE;
762 /* Generate the HW shader */
763 TRACE("(%p) : Generating hardware program\n", This);
764 IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
766 This->baseShader.is_compiled = TRUE;
771 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
773 /*** IUnknown methods ***/
774 IWineD3DVertexShaderImpl_QueryInterface,
775 IWineD3DVertexShaderImpl_AddRef,
776 IWineD3DVertexShaderImpl_Release,
777 /*** IWineD3DBase methods ***/
778 IWineD3DVertexShaderImpl_GetParent,
779 /*** IWineD3DBaseShader methods ***/
780 IWineD3DVertexShaderImpl_SetFunction,
781 IWineD3DVertexShaderImpl_CompileShader,
782 /*** IWineD3DVertexShader methods ***/
783 IWineD3DVertexShaderImpl_GetDevice,
784 IWineD3DVertexShaderImpl_GetFunction,
785 IWineD3DVertexShaderImpl_FakeSemantics,
786 IWIneD3DVertexShaderImpl_SetLocalConstantsF