user32: Fix Win64 warnings.
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with this library; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24  */
25
26 #include "config.h"
27
28 #include <math.h>
29 #include <stdio.h>
30
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34
35 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
36
37 /* Shader debugging - Change the following line to enable debugging of software
38       vertex shaders                                                             */
39 #if 0 /* Musxt not be 1 in cvs version */
40 # define VSTRACE(A) TRACE A
41 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
42 #else
43 # define VSTRACE(A)
44 # define TRACE_VSVECTOR(name)
45 #endif
46
47 /**
48  * NVIDIA: DX8 Vertex Shader to NV Vertex Program
49  *  http://developer.nvidia.com/view.asp?IO=vstovp
50  *
51  * NVIDIA: Memory Management with VAR
52  *  http://developer.nvidia.com/view.asp?IO=var_memory_management
53  */
54
55 /* TODO: Vertex and Pixel shaders are almost identical, the only exception being the way that some of the data is looked up or the availability of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
56 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't surprise me if the program can be cross compiled using a large body of shared code */
57
58 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
59
60 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
61     /* This table is not order or position dependent. */
62
63     /* Arithmetic */
64     {WINED3DSIO_NOP,    "nop",  "NOP", 0, 0, vshader_hw_map2gl,   NULL, 0, 0},
65     {WINED3DSIO_MOV,    "mov",  "MOV", 1, 2, vshader_hw_map2gl,   shader_glsl_mov, 0, 0},
66     {WINED3DSIO_MOVA,   "mova",  NULL, 1, 2, vshader_hw_map2gl,   shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
67     {WINED3DSIO_ADD,    "add",  "ADD", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
68     {WINED3DSIO_SUB,    "sub",  "SUB", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
69     {WINED3DSIO_MAD,    "mad",  "MAD", 1, 4, vshader_hw_map2gl,   shader_glsl_mad, 0, 0},
70     {WINED3DSIO_MUL,    "mul",  "MUL", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
71     {WINED3DSIO_RCP,    "rcp",  "RCP", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rcp, 0, 0},
72     {WINED3DSIO_RSQ,    "rsq",  "RSQ", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rsq, 0, 0},
73     {WINED3DSIO_DP3,    "dp3",  "DP3", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
74     {WINED3DSIO_DP4,    "dp4",  "DP4", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
75     {WINED3DSIO_MIN,    "min",  "MIN", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
76     {WINED3DSIO_MAX,    "max",  "MAX", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
77     {WINED3DSIO_SLT,    "slt",  "SLT", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
78     {WINED3DSIO_SGE,    "sge",  "SGE", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
79     {WINED3DSIO_ABS,    "abs",  "ABS", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
80     {WINED3DSIO_EXP,    "exp",  "EX2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
81     {WINED3DSIO_LOG,    "log",  "LG2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
82     {WINED3DSIO_EXPP,   "expp", "EXP", 1, 2, vshader_hw_map2gl,   shader_glsl_expp, 0, 0},
83     {WINED3DSIO_LOGP,   "logp", "LOG", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
84     {WINED3DSIO_LIT,    "lit",  "LIT", 1, 2, vshader_hw_map2gl,   shader_glsl_lit, 0, 0},
85     {WINED3DSIO_DST,    "dst",  "DST", 1, 3, vshader_hw_map2gl,   shader_glsl_dst, 0, 0},
86     {WINED3DSIO_LRP,    "lrp",  "LRP", 1, 4, NULL,                shader_glsl_lrp, 0, 0},
87     {WINED3DSIO_FRC,    "frc",  "FRC", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
88     {WINED3DSIO_POW,    "pow",  "POW", 1, 3, vshader_hw_map2gl,   shader_glsl_pow, 0, 0},
89     {WINED3DSIO_CRS,    "crs",  "XPD", 1, 3, vshader_hw_map2gl,   shader_glsl_cross, 0, 0},
90     /* TODO: sng can possibly be performed a  s
91         RCP tmp, vec
92         MUL out, tmp, vec*/
93     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
94     {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
95     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
96     {WINED3DSIO_SINCOS, "sincos",  "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
97     /* Matrix */
98     {WINED3DSIO_M4x4,   "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
99     {WINED3DSIO_M4x3,   "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
100     {WINED3DSIO_M3x4,   "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
101     {WINED3DSIO_M3x3,   "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
102     {WINED3DSIO_M3x2,   "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
103     /* Declare registers */
104     {WINED3DSIO_DCL,    "dcl",      NULL,                0, 2, NULL, NULL, 0, 0},
105     /* Constant definitions */
106     {WINED3DSIO_DEF,    "def",      NULL,                1, 5, NULL, NULL, 0, 0},
107     {WINED3DSIO_DEFB,   "defb",     GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
108     {WINED3DSIO_DEFI,   "defi",     GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
109     /* Flow control - requires GLSL or software shaders */
110     {WINED3DSIO_REP ,   "rep",      NULL, 0, 1, NULL, shader_glsl_rep,    WINED3DVS_VERSION(2,0), -1},
111     {WINED3DSIO_ENDREP, "endrep",   NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
112     {WINED3DSIO_IF,     "if",       NULL, 0, 1, NULL, shader_glsl_if,     WINED3DVS_VERSION(2,0), -1},
113     {WINED3DSIO_IFC,    "ifc",      NULL, 0, 2, NULL, shader_glsl_ifc,    WINED3DVS_VERSION(2,1), -1},
114     {WINED3DSIO_ELSE,   "else",     NULL, 0, 0, NULL, shader_glsl_else,   WINED3DVS_VERSION(2,0), -1},
115     {WINED3DSIO_ENDIF,  "endif",    NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
116     {WINED3DSIO_BREAK,  "break",    NULL, 0, 0, NULL, shader_glsl_break,  WINED3DVS_VERSION(2,1), -1},
117     {WINED3DSIO_BREAKC, "breakc",   NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
118     {WINED3DSIO_BREAKP, "breakp",   GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
119     {WINED3DSIO_CALL,   "call",     NULL, 0, 1, NULL, shader_glsl_call,   WINED3DVS_VERSION(2,0), -1},
120     {WINED3DSIO_CALLNZ, "callnz",   NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
121     {WINED3DSIO_LOOP,   "loop",     NULL, 0, 2, NULL, shader_glsl_loop,   WINED3DVS_VERSION(2,0), -1},
122     {WINED3DSIO_RET,    "ret",      NULL, 0, 0, NULL, NULL,               WINED3DVS_VERSION(2,0), -1},
123     {WINED3DSIO_ENDLOOP,"endloop",  NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
124     {WINED3DSIO_LABEL,  "label",    NULL, 0, 1, NULL, shader_glsl_label,  WINED3DVS_VERSION(2,0), -1},
125
126     {WINED3DSIO_SETP,   "setp",     GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
127     {WINED3DSIO_TEXLDL, "texldl",   NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
128     {0,                 NULL,       NULL,                0, 0, NULL, NULL, 0, 0}
129 };
130
131 static void vshader_set_limits(
132       IWineD3DVertexShaderImpl *This) {
133
134       This->baseShader.limits.texcoord = 0;
135       This->baseShader.limits.attributes = 16;
136       This->baseShader.limits.packed_input = 0;
137
138       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
139       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
140
141       switch (This->baseShader.hex_version) {
142           case WINED3DVS_VERSION(1,0):
143           case WINED3DVS_VERSION(1,1):
144                    This->baseShader.limits.temporary = 12;
145                    This->baseShader.limits.constant_bool = 0;
146                    This->baseShader.limits.constant_int = 0;
147                    This->baseShader.limits.address = 1;
148                    This->baseShader.limits.packed_output = 0;
149                    This->baseShader.limits.sampler = 0;
150                    This->baseShader.limits.label = 0;
151                    break;
152       
153           case WINED3DVS_VERSION(2,0):
154           case WINED3DVS_VERSION(2,1):
155                    This->baseShader.limits.temporary = 12;
156                    This->baseShader.limits.constant_bool = 16;
157                    This->baseShader.limits.constant_int = 16;
158                    This->baseShader.limits.address = 1;
159                    This->baseShader.limits.packed_output = 0;
160                    This->baseShader.limits.sampler = 0;
161                    This->baseShader.limits.label = 16;
162                    break;
163
164           case WINED3DVS_VERSION(3,0):
165                    This->baseShader.limits.temporary = 32;
166                    This->baseShader.limits.constant_bool = 32;
167                    This->baseShader.limits.constant_int = 32;
168                    This->baseShader.limits.address = 1;
169                    This->baseShader.limits.packed_output = 12;
170                    This->baseShader.limits.sampler = 4;
171                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
172                    break;
173
174           default: This->baseShader.limits.temporary = 12;
175                    This->baseShader.limits.constant_bool = 16;
176                    This->baseShader.limits.constant_int = 16;
177                    This->baseShader.limits.address = 1;
178                    This->baseShader.limits.packed_output = 0;
179                    This->baseShader.limits.sampler = 0;
180                    This->baseShader.limits.label = 16;
181                    FIXME("Unrecognized vertex shader version %#x\n",
182                        This->baseShader.hex_version);
183       }
184 }
185
186 /* This is an internal function,
187  * used to create fake semantics for shaders
188  * that don't have them - d3d8 shaders where the declaration
189  * stores the register for each input
190  */
191 static void vshader_set_input(
192     IWineD3DVertexShaderImpl* This,
193     unsigned int regnum,
194     BYTE usage, BYTE usage_idx) {
195
196     /* Fake usage: set reserved bit, usage, usage_idx */
197     DWORD usage_token = (0x1 << 31) |
198         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
199
200     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
201     DWORD reg_token = (0x1 << 31) |
202         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
203
204     This->semantics_in[regnum].usage = usage_token;
205     This->semantics_in[regnum].reg = reg_token;
206 }
207
208 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
209     if (usage_idx1 != usage_idx2) return FALSE;
210     if (usage1 == usage2) return TRUE;
211     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
212     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
213
214     return FALSE;
215 }
216
217 BOOL vshader_get_input(
218     IWineD3DVertexShader* iface,
219     BYTE usage_req, BYTE usage_idx_req,
220     unsigned int* regnum) {
221
222     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
223     int i;
224
225     for (i = 0; i < MAX_ATTRIBS; i++) {
226         DWORD usage_token = This->semantics_in[i].usage;
227         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
228         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
229
230         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
231             *regnum = i;
232             return TRUE;
233         }
234     }
235     return FALSE;
236 }
237
238 BOOL vshader_input_is_color(
239     IWineD3DVertexShader* iface,
240     unsigned int regnum) {
241
242     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
243
244     DWORD usage_token = This->semantics_in[regnum].usage;
245     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
246     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
247
248     int i;
249
250     for(i = 0; i < This->num_swizzled_attribs; i++) {
251         if(This->swizzled_attribs[i].usage == usage &&
252            This->swizzled_attribs[i].idx == usage_idx) {
253             return TRUE;
254         }
255     }
256     return FALSE;
257 }
258
259 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
260     UINT num = 0, i, j;
261     UINT numoldswizzles = This->num_swizzled_attribs;
262     IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
263
264     DWORD usage_token, usage, usage_idx;
265     BOOL found;
266
267     attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
268
269     /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
270     memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
271
272     memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
273
274     for(i = 0; i < decl->num_swizzled_attribs; i++) {
275         for(j = 0; j < MAX_ATTRIBS; j++) {
276
277             if(!This->baseShader.reg_maps.attributes[j]) continue;
278
279             usage_token = This->semantics_in[j].usage;
280             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
281             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
282
283             if(decl->swizzled_attribs[i].usage == usage &&
284                decl->swizzled_attribs[i].idx == usage_idx) {
285                 This->swizzled_attribs[num].usage = usage;
286                 This->swizzled_attribs[num].idx = usage_idx;
287                 num++;
288             }
289         }
290     }
291
292     /* Add previously converted attributes back in if they are not defined in the current declaration */
293     for(i = 0; i < numoldswizzles; i++) {
294
295         found = FALSE;
296         for(j = 0; j < decl->declarationWNumElements; j++) {
297             if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
298                oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
299                 found = TRUE;
300             }
301         }
302         if(found) {
303             /* This previously converted attribute is declared in the current declaration. Either it is
304              * already in the new array, or it should not be there. Skip it
305              */
306             continue;
307         }
308         /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
309          * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
310          * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
311          * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
312          * stays unswizzled as well because it isn't found in the oldswizzles array
313          */
314         for(j = 0; j < num; j++) {
315             if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
316                oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
317                oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
318                 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
319                          sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
320                 break;
321             }
322         }
323         This->swizzled_attribs[j].usage = oldswizzles[i].usage;
324         This->swizzled_attribs[j].idx = oldswizzles[i].idx;
325         num++;
326     }
327
328     TRACE("New swizzled attributes array\n");
329     for(i = 0; i < num; i++) {
330         TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
331               This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
332     }
333     This->num_swizzled_attribs = num;
334 }
335 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
336     or GLSL and send it to the card */
337 static VOID IWineD3DVertexShaderImpl_GenerateShader(
338     IWineD3DVertexShader *iface,
339     shader_reg_maps* reg_maps,
340     CONST DWORD *pFunction) {
341
342     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
343     IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
344     SHADER_BUFFER buffer;
345
346     find_swizzled_attribs(decl, This);
347
348 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
349         it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
350     if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
351         HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
352         This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
353         This->fixupVertexBufferSize = PGMSIZE;
354         This->fixupVertexBuffer[0] = 0;
355     }
356     buffer.buffer = This->device->fixupVertexBuffer;
357 #else
358     buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); 
359 #endif
360     buffer.bsize = 0;
361     buffer.lineNo = 0;
362     buffer.newline = TRUE;
363
364     ((IWineD3DDeviceImpl *)This->baseShader.device)->shader_backend->shader_generate_vshader(iface, &buffer);
365
366 #if 1 /* if were using the data buffer of device then we don't need to free it */
367   HeapFree(GetProcessHeap(), 0, buffer.buffer);
368 #endif
369 }
370
371 /* *******************************************
372    IWineD3DVertexShader IUnknown parts follow
373    ******************************************* */
374 static HRESULT  WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
375     return IWineD3DBaseShaderImpl_QueryInterface((IWineD3DBaseShader *) iface, riid, ppobj);
376 }
377
378 static ULONG  WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
379     return IWineD3DBaseShaderImpl_AddRef((IWineD3DBaseShader *) iface);
380 }
381
382 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
383     return IWineD3DBaseShaderImpl_Release((IWineD3DBaseShader *) iface);
384 }
385
386 /* *******************************************
387    IWineD3DVertexShader IWineD3DVertexShader parts follow
388    ******************************************* */
389
390 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
391     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
392     
393     *parent = This->parent;
394     IUnknown_AddRef(*parent);
395     TRACE("(%p) : returning %p\n", This, *parent);
396     return WINED3D_OK;
397 }
398
399 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
400     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
401     IWineD3DDevice_AddRef(This->baseShader.device);
402     *pDevice = This->baseShader.device;
403     TRACE("(%p) returning %p\n", This, *pDevice);
404     return WINED3D_OK;
405 }
406
407 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
408     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
409     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
410
411     if (NULL == pData) {
412         *pSizeOfData = This->baseShader.functionLength;
413         return WINED3D_OK;
414     }
415     if (*pSizeOfData < This->baseShader.functionLength) {
416         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
417          * than the required size we should write the required size and
418          * return D3DERR_MOREDATA. That's not actually true. */
419         return WINED3DERR_INVALIDCALL;
420     }
421     if (NULL == This->baseShader.function) { /* no function defined */
422         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
423         (*(DWORD **) pData) = NULL;
424     } else {
425         if(This->baseShader.functionLength == 0){
426
427         }
428         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
429         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
430     }
431     return WINED3D_OK;
432 }
433
434 /* Note that for vertex shaders CompileShader isn't called until the
435  * shader is first used. The reason for this is that we need the vertex
436  * declaration the shader will be used with in order to determine if
437  * the data in a register is of type D3DCOLOR, and needs swizzling. */
438 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
439
440     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
441     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
442     HRESULT hr;
443     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
444
445     TRACE("(%p) : pFunction %p\n", iface, pFunction);
446
447     /* First pass: trace shader */
448     shader_trace_init((IWineD3DBaseShader*) This, pFunction);
449     vshader_set_limits(This);
450
451     /* Initialize immediate constant lists */
452     list_init(&This->baseShader.constantsF);
453     list_init(&This->baseShader.constantsB);
454     list_init(&This->baseShader.constantsI);
455
456     /* Second pass: figure out registers used, semantics, etc.. */
457     This->min_rel_offset = GL_LIMITS(vshader_constantsF);
458     This->max_rel_offset = 0;
459     memset(reg_maps, 0, sizeof(shader_reg_maps));
460     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
461        This->semantics_in, This->semantics_out, pFunction, NULL);
462     if (hr != WINED3D_OK) return hr;
463
464     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
465
466     if(deviceImpl->vs_selected_mode == SHADER_ARB &&
467        (GLINFO_LOCATION).arb_vs_offset_limit      &&
468        This->min_rel_offset <= This->max_rel_offset) {
469
470         if(This->max_rel_offset - This->min_rel_offset > 127) {
471             FIXME("The difference between the minimum and maximum relative offset is > 127\n");
472             FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
473             FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
474         } else if(This->max_rel_offset - This->min_rel_offset > 63) {
475             This->rel_offset = This->min_rel_offset + 63;
476         } else if(This->max_rel_offset > 63) {
477             This->rel_offset = This->min_rel_offset;
478         } else {
479             This->rel_offset = 0;
480         }
481     }
482     This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
483
484     /* copy the function ... because it will certainly be released by application */
485     if (NULL != pFunction) {
486         void *function;
487
488         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
489         if (!function) return E_OUTOFMEMORY;
490         memcpy(function, pFunction, This->baseShader.functionLength);
491         This->baseShader.function = function;
492     } else {
493         This->baseShader.function = NULL;
494     }
495
496     return WINED3D_OK;
497 }
498
499 /* Preload semantics for d3d8 shaders */
500 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
501     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
502     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
503
504     int i;
505     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
506         WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
507         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
508     }
509 }
510
511 /* Set local constants for d3d8 shaders */
512 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
513         UINT start_idx, const float *src_data, UINT count) {
514     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
515     UINT i, end_idx;
516
517     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
518
519     end_idx = start_idx + count;
520     if (end_idx > GL_LIMITS(vshader_constantsF)) {
521         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
522         end_idx = GL_LIMITS(vshader_constantsF);
523     }
524
525     for (i = start_idx; i < end_idx; ++i) {
526         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
527         if (!lconst) return E_OUTOFMEMORY;
528
529         lconst->idx = i;
530         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
531         list_add_head(&This->baseShader.constantsF, &lconst->entry);
532     }
533
534     return WINED3D_OK;
535 }
536
537 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
538     UINT i, j, k;
539     BOOL found;
540
541     DWORD usage_token;
542     DWORD usage;
543     DWORD usage_idx;
544
545     for(i = 0; i < vdecl->declarationWNumElements; i++) {
546         /* Ignore tesselated streams and the termination entry(position0, stream 255, unused) */
547         if(vdecl->pDeclarationWine[i].Stream >= MAX_STREAMS ||
548            vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_UNUSED) continue;
549
550         for(j = 0; j < MAX_ATTRIBS; j++) {
551             if(!This->baseShader.reg_maps.attributes[j]) continue;
552
553             usage_token = This->semantics_in[j].usage;
554             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
555             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
556
557             if(vdecl->pDeclarationWine[i].Usage != usage ||
558                vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
559                 continue;
560             }
561
562             found = FALSE;
563             for(k = 0; k < This->num_swizzled_attribs; k++) {
564                 if(This->swizzled_attribs[k].usage == usage &&
565                     This->swizzled_attribs[k].idx == usage_idx) {
566                     found = TRUE;
567                 }
568             }
569             if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
570                 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
571                       debug_d3ddeclusage(usage), usage_idx);
572                 return TRUE;
573             }
574             if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
575                 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
576                       debug_d3ddeclusage(usage), usage_idx);
577                 return TRUE;
578             }
579         }
580     }
581     return FALSE;
582 }
583
584 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
585     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
586     IWineD3DVertexDeclarationImpl *vdecl;
587     CONST DWORD *function = This->baseShader.function;
588     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
589
590     TRACE("(%p) : function %p\n", iface, function);
591
592     /* We're already compiled. */
593     if (This->baseShader.is_compiled) {
594         vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
595
596         if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
597            memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
598
599             /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
600              * we have to recompile, but we have to take a deeper look at see if the attribs that differ
601              * are declared in the decl and used in the shader
602              */
603             if(swizzled_attribs_differ(This, vdecl)) {
604                 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
605                 goto recompile;
606             }
607             WARN("Swizzled attribute validation required an expensive comparison\n");
608         }
609
610         return WINED3D_OK;
611
612         recompile:
613         if(This->recompile_count < 50) {
614             This->recompile_count++;
615         } else {
616             FIXME("Vertexshader %p recompiled more than 50 times\n", This);
617         }
618
619         deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
620     }
621
622     /* We don't need to compile */
623     if (!function) {
624         This->baseShader.is_compiled = TRUE;
625         return WINED3D_OK;
626     }
627
628     /* Generate the HW shader */
629     TRACE("(%p) : Generating hardware program\n", This);
630     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
631
632     This->baseShader.is_compiled = TRUE;
633
634     return WINED3D_OK;
635 }
636
637 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
638 {
639     /*** IUnknown methods ***/
640     IWineD3DVertexShaderImpl_QueryInterface,
641     IWineD3DVertexShaderImpl_AddRef,
642     IWineD3DVertexShaderImpl_Release,
643     /*** IWineD3DBase methods ***/
644     IWineD3DVertexShaderImpl_GetParent,
645     /*** IWineD3DBaseShader methods ***/
646     IWineD3DVertexShaderImpl_SetFunction,
647     IWineD3DVertexShaderImpl_CompileShader,
648     /*** IWineD3DVertexShader methods ***/
649     IWineD3DVertexShaderImpl_GetDevice,
650     IWineD3DVertexShaderImpl_GetFunction,
651     IWineD3DVertexShaderImpl_FakeSemantics,
652     IWIneD3DVertexShaderImpl_SetLocalConstantsF
653 };