comctl32/tests: Destroy the window after the tests.
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with this library; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24  */
25
26 #include "config.h"
27
28 #include <math.h>
29 #include <stdio.h>
30
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34
35 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
36
37 /* TODO: Vertex and Pixel shaders are almost identical, the only exception being the way that some of the data is looked up or the availability of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
38 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't surprise me if the program can be cross compiled using a large body of shared code */
39
40 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
41
42 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
43     /* This table is not order or position dependent. */
44
45     /* Arithmetic */
46     {WINED3DSIO_NOP,     "nop",     "NOP",               0, 0, WINED3DSIH_NOP,     0,                      0                     },
47     {WINED3DSIO_MOV,     "mov",     "MOV",               1, 2, WINED3DSIH_MOV,     0,                      0                     },
48     {WINED3DSIO_MOVA,    "mova",    NULL,                1, 2, WINED3DSIH_MOVA,    WINED3DVS_VERSION(2,0), -1                    },
49     {WINED3DSIO_ADD,     "add",     "ADD",               1, 3, WINED3DSIH_ADD,     0,                      0                     },
50     {WINED3DSIO_SUB,     "sub",     "SUB",               1, 3, WINED3DSIH_SUB,     0,                      0                     },
51     {WINED3DSIO_MAD,     "mad",     "MAD",               1, 4, WINED3DSIH_MAD,     0,                      0                     },
52     {WINED3DSIO_MUL,     "mul",     "MUL",               1, 3, WINED3DSIH_MUL,     0,                      0                     },
53     {WINED3DSIO_RCP,     "rcp",     "RCP",               1, 2, WINED3DSIH_RCP,     0,                      0                     },
54     {WINED3DSIO_RSQ,     "rsq",     "RSQ",               1, 2, WINED3DSIH_RSQ,     0,                      0                     },
55     {WINED3DSIO_DP3,     "dp3",     "DP3",               1, 3, WINED3DSIH_DP3,     0,                      0                     },
56     {WINED3DSIO_DP4,     "dp4",     "DP4",               1, 3, WINED3DSIH_DP4,     0,                      0                     },
57     {WINED3DSIO_MIN,     "min",     "MIN",               1, 3, WINED3DSIH_MIN,     0,                      0                     },
58     {WINED3DSIO_MAX,     "max",     "MAX",               1, 3, WINED3DSIH_MAX,     0,                      0                     },
59     {WINED3DSIO_SLT,     "slt",     "SLT",               1, 3, WINED3DSIH_SLT,     0,                      0                     },
60     {WINED3DSIO_SGE,     "sge",     "SGE",               1, 3, WINED3DSIH_SGE,     0,                      0                     },
61     {WINED3DSIO_ABS,     "abs",     "ABS",               1, 2, WINED3DSIH_ABS,     0,                      0                     },
62     {WINED3DSIO_EXP,     "exp",     "EX2",               1, 2, WINED3DSIH_EXP,     0,                      0                     },
63     {WINED3DSIO_LOG,     "log",     "LG2",               1, 2, WINED3DSIH_LOG,     0,                      0                     },
64     {WINED3DSIO_EXPP,    "expp",    "EXP",               1, 2, WINED3DSIH_EXPP,    0,                      0                     },
65     {WINED3DSIO_LOGP,    "logp",    "LOG",               1, 2, WINED3DSIH_LOGP,    0,                      0                     },
66     {WINED3DSIO_LIT,     "lit",     "LIT",               1, 2, WINED3DSIH_LIT,     0,                      0                     },
67     {WINED3DSIO_DST,     "dst",     "DST",               1, 3, WINED3DSIH_DST,     0,                      0                     },
68     {WINED3DSIO_LRP,     "lrp",     "LRP",               1, 4, WINED3DSIH_LRP,     0,                      0                     },
69     {WINED3DSIO_FRC,     "frc",     "FRC",               1, 2, WINED3DSIH_FRC,     0,                      0                     },
70     {WINED3DSIO_POW,     "pow",     "POW",               1, 3, WINED3DSIH_POW,     0,                      0                     },
71     {WINED3DSIO_CRS,     "crs",     "XPD",               1, 3, WINED3DSIH_CRS,     0,                      0                     },
72     /* TODO: sng can possibly be performed a  s
73         RCP tmp, vec
74         MUL out, tmp, vec*/
75     {WINED3DSIO_SGN,     "sgn",     NULL,                1, 2, WINED3DSIH_SGN,     0,                      0                     },
76     {WINED3DSIO_NRM,     "nrm",     NULL,                1, 2, WINED3DSIH_NRM,     0,                      0                     },
77     {WINED3DSIO_SINCOS,  "sincos",  NULL,                1, 4, WINED3DSIH_SINCOS,  WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
78     {WINED3DSIO_SINCOS,  "sincos",  "SCS",               1, 2, WINED3DSIH_SINCOS,  WINED3DVS_VERSION(3,0), -1                    },
79     /* Matrix */
80     {WINED3DSIO_M4x4,    "m4x4",    "undefined",         1, 3, WINED3DSIH_M4x4,    0,                      0                     },
81     {WINED3DSIO_M4x3,    "m4x3",    "undefined",         1, 3, WINED3DSIH_M4x3,    0,                      0                     },
82     {WINED3DSIO_M3x4,    "m3x4",    "undefined",         1, 3, WINED3DSIH_M3x4,    0,                      0                     },
83     {WINED3DSIO_M3x3,    "m3x3",    "undefined",         1, 3, WINED3DSIH_M3x3,    0,                      0                     },
84     {WINED3DSIO_M3x2,    "m3x2",    "undefined",         1, 3, WINED3DSIH_M3x2,    0,                      0                     },
85     /* Declare registers */
86     {WINED3DSIO_DCL,     "dcl",     NULL,                0, 2, WINED3DSIH_DCL,     0,                      0                     },
87     /* Constant definitions */
88     {WINED3DSIO_DEF,     "def",     NULL,                1, 5, WINED3DSIH_DEF,     0,                      0                     },
89     {WINED3DSIO_DEFB,    "defb",    GLNAME_REQUIRE_GLSL, 1, 2, WINED3DSIH_DEFB,    0,                      0                     },
90     {WINED3DSIO_DEFI,    "defi",    GLNAME_REQUIRE_GLSL, 1, 5, WINED3DSIH_DEFI,    0,                      0                     },
91     /* Flow control - requires GLSL or software shaders */
92     {WINED3DSIO_REP ,    "rep",     NULL,                0, 1, WINED3DSIH_REP,     WINED3DVS_VERSION(2,0), -1                    },
93     {WINED3DSIO_ENDREP,  "endrep",  NULL,                0, 0, WINED3DSIH_ENDREP,  WINED3DVS_VERSION(2,0), -1                    },
94     {WINED3DSIO_IF,      "if",      NULL,                0, 1, WINED3DSIH_IF,      WINED3DVS_VERSION(2,0), -1                    },
95     {WINED3DSIO_IFC,     "ifc",     NULL,                0, 2, WINED3DSIH_IFC,     WINED3DVS_VERSION(2,1), -1                    },
96     {WINED3DSIO_ELSE,    "else",    NULL,                0, 0, WINED3DSIH_ELSE,    WINED3DVS_VERSION(2,0), -1                    },
97     {WINED3DSIO_ENDIF,   "endif",   NULL,                0, 0, WINED3DSIH_ENDIF,   WINED3DVS_VERSION(2,0), -1                    },
98     {WINED3DSIO_BREAK,   "break",   NULL,                0, 0, WINED3DSIH_BREAK,   WINED3DVS_VERSION(2,1), -1                    },
99     {WINED3DSIO_BREAKC,  "breakc",  NULL,                0, 2, WINED3DSIH_BREAKC,  WINED3DVS_VERSION(2,1), -1                    },
100     {WINED3DSIO_BREAKP,  "breakp",  GLNAME_REQUIRE_GLSL, 0, 1, WINED3DSIH_BREAKP,  0,                      0                     },
101     {WINED3DSIO_CALL,    "call",    NULL,                0, 1, WINED3DSIH_CALL,    WINED3DVS_VERSION(2,0), -1                    },
102     {WINED3DSIO_CALLNZ,  "callnz",  NULL,                0, 2, WINED3DSIH_CALLNZ,  WINED3DVS_VERSION(2,0), -1                    },
103     {WINED3DSIO_LOOP,    "loop",    NULL,                0, 2, WINED3DSIH_LOOP,    WINED3DVS_VERSION(2,0), -1                    },
104     {WINED3DSIO_RET,     "ret",     NULL,                0, 0, WINED3DSIH_RET,     WINED3DVS_VERSION(2,0), -1                    },
105     {WINED3DSIO_ENDLOOP, "endloop", NULL,                0, 0, WINED3DSIH_ENDLOOP, WINED3DVS_VERSION(2,0), -1                    },
106     {WINED3DSIO_LABEL,   "label",   NULL,                0, 1, WINED3DSIH_LABEL,   WINED3DVS_VERSION(2,0), -1                    },
107
108     {WINED3DSIO_SETP,    "setp",    GLNAME_REQUIRE_GLSL, 1, 3, WINED3DSIH_SETP,    0,                      0                     },
109     {WINED3DSIO_TEXLDL,  "texldl",  NULL,                1, 3, WINED3DSIH_TEXLDL,  WINED3DVS_VERSION(3,0), -1                    },
110     {0,                  NULL,      NULL,                0, 0, 0,                  0,                      0                     }
111 };
112
113 static void vshader_set_limits(
114       IWineD3DVertexShaderImpl *This) {
115
116       This->baseShader.limits.texcoord = 0;
117       This->baseShader.limits.attributes = 16;
118       This->baseShader.limits.packed_input = 0;
119
120       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
121       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
122
123       switch (This->baseShader.reg_maps.shader_version)
124       {
125           case WINED3DVS_VERSION(1,0):
126           case WINED3DVS_VERSION(1,1):
127                    This->baseShader.limits.temporary = 12;
128                    This->baseShader.limits.constant_bool = 0;
129                    This->baseShader.limits.constant_int = 0;
130                    This->baseShader.limits.address = 1;
131                    This->baseShader.limits.packed_output = 0;
132                    This->baseShader.limits.sampler = 0;
133                    This->baseShader.limits.label = 0;
134                    break;
135       
136           case WINED3DVS_VERSION(2,0):
137           case WINED3DVS_VERSION(2,1):
138                    This->baseShader.limits.temporary = 12;
139                    This->baseShader.limits.constant_bool = 16;
140                    This->baseShader.limits.constant_int = 16;
141                    This->baseShader.limits.address = 1;
142                    This->baseShader.limits.packed_output = 0;
143                    This->baseShader.limits.sampler = 0;
144                    This->baseShader.limits.label = 16;
145                    break;
146
147           case WINED3DVS_VERSION(3,0):
148                    This->baseShader.limits.temporary = 32;
149                    This->baseShader.limits.constant_bool = 32;
150                    This->baseShader.limits.constant_int = 32;
151                    This->baseShader.limits.address = 1;
152                    This->baseShader.limits.packed_output = 12;
153                    This->baseShader.limits.sampler = 4;
154                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
155                    break;
156
157           default: This->baseShader.limits.temporary = 12;
158                    This->baseShader.limits.constant_bool = 16;
159                    This->baseShader.limits.constant_int = 16;
160                    This->baseShader.limits.address = 1;
161                    This->baseShader.limits.packed_output = 0;
162                    This->baseShader.limits.sampler = 0;
163                    This->baseShader.limits.label = 16;
164                    FIXME("Unrecognized vertex shader version %#x\n",
165                            This->baseShader.reg_maps.shader_version);
166       }
167 }
168
169 /* This is an internal function,
170  * used to create fake semantics for shaders
171  * that don't have them - d3d8 shaders where the declaration
172  * stores the register for each input
173  */
174 static void vshader_set_input(
175     IWineD3DVertexShaderImpl* This,
176     unsigned int regnum,
177     BYTE usage, BYTE usage_idx) {
178
179     /* Fake usage: set reserved bit, usage, usage_idx */
180     DWORD usage_token = (0x1 << 31) |
181         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
182
183     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
184     DWORD reg_token = (0x1 << 31) |
185         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
186
187     This->semantics_in[regnum].usage = usage_token;
188     This->semantics_in[regnum].reg = reg_token;
189 }
190
191 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
192     if (usage_idx1 != usage_idx2) return FALSE;
193     if (usage1 == usage2) return TRUE;
194     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
195     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
196
197     return FALSE;
198 }
199
200 BOOL vshader_get_input(
201     IWineD3DVertexShader* iface,
202     BYTE usage_req, BYTE usage_idx_req,
203     unsigned int* regnum) {
204
205     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
206     int i;
207
208     for (i = 0; i < MAX_ATTRIBS; i++) {
209         DWORD usage_token = This->semantics_in[i].usage;
210         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
211         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
212
213         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
214             *regnum = i;
215             return TRUE;
216         }
217     }
218     return FALSE;
219 }
220
221 BOOL vshader_input_is_color(
222     IWineD3DVertexShader* iface,
223     unsigned int regnum) {
224
225     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
226
227     DWORD usage_token = This->semantics_in[regnum].usage;
228     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
229     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
230
231     int i;
232
233     for(i = 0; i < This->num_swizzled_attribs; i++) {
234         if(This->swizzled_attribs[i].usage == usage &&
235            This->swizzled_attribs[i].idx == usage_idx) {
236             return TRUE;
237         }
238     }
239     return FALSE;
240 }
241
242 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
243     UINT num = 0, i, j;
244     UINT numoldswizzles = This->num_swizzled_attribs;
245     IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
246
247     DWORD usage_token, usage, usage_idx;
248     BOOL found;
249
250     attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
251
252     /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
253     memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
254
255     memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
256
257     for(i = 0; i < decl->num_swizzled_attribs; i++) {
258         for(j = 0; j < MAX_ATTRIBS; j++) {
259
260             if(!This->baseShader.reg_maps.attributes[j]) continue;
261
262             usage_token = This->semantics_in[j].usage;
263             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
264             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
265
266             if(decl->swizzled_attribs[i].usage == usage &&
267                decl->swizzled_attribs[i].idx == usage_idx) {
268                 This->swizzled_attribs[num].usage = usage;
269                 This->swizzled_attribs[num].idx = usage_idx;
270                 num++;
271             }
272         }
273     }
274
275     /* Add previously converted attributes back in if they are not defined in the current declaration */
276     for(i = 0; i < numoldswizzles; i++) {
277
278         found = FALSE;
279         for(j = 0; j < decl->declarationWNumElements; j++) {
280             if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
281                oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
282                 found = TRUE;
283             }
284         }
285         if(found) {
286             /* This previously converted attribute is declared in the current declaration. Either it is
287              * already in the new array, or it should not be there. Skip it
288              */
289             continue;
290         }
291         /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
292          * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
293          * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
294          * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
295          * stays unswizzled as well because it isn't found in the oldswizzles array
296          */
297         for(j = 0; j < num; j++) {
298             if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
299                oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
300                oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
301                 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
302                          sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
303                 break;
304             }
305         }
306         This->swizzled_attribs[j].usage = oldswizzles[i].usage;
307         This->swizzled_attribs[j].idx = oldswizzles[i].idx;
308         num++;
309     }
310
311     TRACE("New swizzled attributes array\n");
312     for(i = 0; i < num; i++) {
313         TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
314               This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
315     }
316     This->num_swizzled_attribs = num;
317 }
318 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
319     or GLSL and send it to the card */
320 static void IWineD3DVertexShaderImpl_GenerateShader(IWineD3DVertexShader *iface,
321         const struct shader_reg_maps* reg_maps, const DWORD *pFunction)
322 {
323     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
324     IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
325     SHADER_BUFFER buffer;
326
327     find_swizzled_attribs(decl, This);
328
329     shader_buffer_init(&buffer);
330     ((IWineD3DDeviceImpl *)This->baseShader.device)->shader_backend->shader_generate_vshader(iface, &buffer);
331     shader_buffer_free(&buffer);
332 }
333
334 /* *******************************************
335    IWineD3DVertexShader IUnknown parts follow
336    ******************************************* */
337 static HRESULT  WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
338     TRACE("iface %p, riid %s, ppobj %p\n", iface, debugstr_guid(riid), ppobj);
339
340     if (IsEqualGUID(riid, &IID_IWineD3DVertexShader)
341             || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
342             || IsEqualGUID(riid, &IID_IWineD3DBase)
343             || IsEqualGUID(riid, &IID_IUnknown))
344     {
345         IUnknown_AddRef(iface);
346         *ppobj = iface;
347         return S_OK;
348     }
349
350     WARN("%s not implemented, returning E_NOINTERFACE\n", debugstr_guid(riid));
351
352     *ppobj = NULL;
353     return E_NOINTERFACE;
354 }
355
356 static ULONG  WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
357     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
358     ULONG refcount = InterlockedIncrement(&This->baseShader.ref);
359
360     TRACE("%p increasing refcount to %u\n", This, refcount);
361
362     return refcount;
363 }
364
365 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
366     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
367     ULONG refcount = InterlockedDecrement(&This->baseShader.ref);
368
369     TRACE("%p decreasing refcount to %u\n", This, refcount);
370
371     if (!refcount)
372     {
373         shader_cleanup((IWineD3DBaseShader *)iface);
374         HeapFree(GetProcessHeap(), 0, This);
375     }
376
377     return refcount;
378 }
379
380 /* *******************************************
381    IWineD3DVertexShader IWineD3DVertexShader parts follow
382    ******************************************* */
383
384 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
385     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
386     
387     *parent = This->parent;
388     IUnknown_AddRef(*parent);
389     TRACE("(%p) : returning %p\n", This, *parent);
390     return WINED3D_OK;
391 }
392
393 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
394     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
395     IWineD3DDevice_AddRef(This->baseShader.device);
396     *pDevice = This->baseShader.device;
397     TRACE("(%p) returning %p\n", This, *pDevice);
398     return WINED3D_OK;
399 }
400
401 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
402     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
403     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
404
405     if (NULL == pData) {
406         *pSizeOfData = This->baseShader.functionLength;
407         return WINED3D_OK;
408     }
409     if (*pSizeOfData < This->baseShader.functionLength) {
410         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
411          * than the required size we should write the required size and
412          * return D3DERR_MOREDATA. That's not actually true. */
413         return WINED3DERR_INVALIDCALL;
414     }
415     if (NULL == This->baseShader.function) { /* no function defined */
416         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
417         (*(DWORD **) pData) = NULL;
418     } else {
419         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
420         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
421     }
422     return WINED3D_OK;
423 }
424
425 /* Note that for vertex shaders CompileShader isn't called until the
426  * shader is first used. The reason for this is that we need the vertex
427  * declaration the shader will be used with in order to determine if
428  * the data in a register is of type D3DCOLOR, and needs swizzling. */
429 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
430
431     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
432     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
433     HRESULT hr;
434     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
435
436     TRACE("(%p) : pFunction %p\n", iface, pFunction);
437
438     /* First pass: trace shader */
439     if (TRACE_ON(d3d_shader)) shader_trace_init(pFunction, This->baseShader.shader_ins);
440
441     /* Initialize immediate constant lists */
442     list_init(&This->baseShader.constantsF);
443     list_init(&This->baseShader.constantsB);
444     list_init(&This->baseShader.constantsI);
445
446     /* Second pass: figure out registers used, semantics, etc.. */
447     This->min_rel_offset = GL_LIMITS(vshader_constantsF);
448     This->max_rel_offset = 0;
449     memset(reg_maps, 0, sizeof(shader_reg_maps));
450     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
451             This->semantics_in, This->semantics_out, pFunction);
452     if (hr != WINED3D_OK) return hr;
453
454     vshader_set_limits(This);
455
456     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
457
458     if(deviceImpl->vs_selected_mode == SHADER_ARB &&
459        (GLINFO_LOCATION).arb_vs_offset_limit      &&
460        This->min_rel_offset <= This->max_rel_offset) {
461
462         if(This->max_rel_offset - This->min_rel_offset > 127) {
463             FIXME("The difference between the minimum and maximum relative offset is > 127\n");
464             FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
465             FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
466         } else if(This->max_rel_offset - This->min_rel_offset > 63) {
467             This->rel_offset = This->min_rel_offset + 63;
468         } else if(This->max_rel_offset > 63) {
469             This->rel_offset = This->min_rel_offset;
470         } else {
471             This->rel_offset = 0;
472         }
473     }
474     This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
475
476     /* copy the function ... because it will certainly be released by application */
477     if (NULL != pFunction) {
478         void *function;
479
480         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
481         if (!function) return E_OUTOFMEMORY;
482         memcpy(function, pFunction, This->baseShader.functionLength);
483         This->baseShader.function = function;
484     } else {
485         This->baseShader.function = NULL;
486     }
487
488     return WINED3D_OK;
489 }
490
491 /* Preload semantics for d3d8 shaders */
492 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
493     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
494     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
495
496     int i;
497     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
498         const WINED3DVERTEXELEMENT *element = vdecl->pDeclarationWine + i;
499         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
500     }
501 }
502
503 /* Set local constants for d3d8 shaders */
504 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
505         UINT start_idx, const float *src_data, UINT count) {
506     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
507     UINT i, end_idx;
508
509     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
510
511     end_idx = start_idx + count;
512     if (end_idx > GL_LIMITS(vshader_constantsF)) {
513         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
514         end_idx = GL_LIMITS(vshader_constantsF);
515     }
516
517     for (i = start_idx; i < end_idx; ++i) {
518         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
519         if (!lconst) return E_OUTOFMEMORY;
520
521         lconst->idx = i;
522         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
523         list_add_head(&This->baseShader.constantsF, &lconst->entry);
524     }
525
526     return WINED3D_OK;
527 }
528
529 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
530     UINT i, j, k;
531     BOOL found;
532
533     DWORD usage_token;
534     DWORD usage;
535     DWORD usage_idx;
536
537     for(i = 0; i < vdecl->declarationWNumElements; i++) {
538         /* Ignore tesselated streams and the termination entry(position0, stream 255, unused) */
539         if(vdecl->pDeclarationWine[i].Stream >= MAX_STREAMS ||
540            vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_UNUSED) continue;
541
542         for(j = 0; j < MAX_ATTRIBS; j++) {
543             if(!This->baseShader.reg_maps.attributes[j]) continue;
544
545             usage_token = This->semantics_in[j].usage;
546             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
547             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
548
549             if(vdecl->pDeclarationWine[i].Usage != usage ||
550                vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
551                 continue;
552             }
553
554             found = FALSE;
555             for(k = 0; k < This->num_swizzled_attribs; k++) {
556                 if(This->swizzled_attribs[k].usage == usage &&
557                     This->swizzled_attribs[k].idx == usage_idx) {
558                     found = TRUE;
559                 }
560             }
561             if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
562                 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
563                       debug_d3ddeclusage(usage), usage_idx);
564                 return TRUE;
565             }
566             if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
567                 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
568                       debug_d3ddeclusage(usage), usage_idx);
569                 return TRUE;
570             }
571         }
572     }
573     return FALSE;
574 }
575
576 HRESULT IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
577     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
578     IWineD3DVertexDeclarationImpl *vdecl;
579     CONST DWORD *function = This->baseShader.function;
580     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
581
582     TRACE("(%p) : function %p\n", iface, function);
583
584     /* We're already compiled. */
585     if (This->baseShader.is_compiled) {
586         vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
587
588         if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
589            memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
590
591             /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
592              * we have to recompile, but we have to take a deeper look at see if the attribs that differ
593              * are declared in the decl and used in the shader
594              */
595             if(swizzled_attribs_differ(This, vdecl)) {
596                 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
597                 goto recompile;
598             }
599             WARN("Swizzled attribute validation required an expensive comparison\n");
600         }
601
602         return WINED3D_OK;
603
604         recompile:
605         if(This->recompile_count < 50) {
606             This->recompile_count++;
607         } else {
608             FIXME("Vertexshader %p recompiled more than 50 times\n", This);
609         }
610
611         deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
612     }
613
614     /* We don't need to compile */
615     if (!function) {
616         This->baseShader.is_compiled = TRUE;
617         return WINED3D_OK;
618     }
619
620     /* Generate the HW shader */
621     TRACE("(%p) : Generating hardware program\n", This);
622     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
623
624     This->baseShader.is_compiled = TRUE;
625
626     return WINED3D_OK;
627 }
628
629 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
630 {
631     /*** IUnknown methods ***/
632     IWineD3DVertexShaderImpl_QueryInterface,
633     IWineD3DVertexShaderImpl_AddRef,
634     IWineD3DVertexShaderImpl_Release,
635     /*** IWineD3DBase methods ***/
636     IWineD3DVertexShaderImpl_GetParent,
637     /*** IWineD3DBaseShader methods ***/
638     IWineD3DVertexShaderImpl_SetFunction,
639     /*** IWineD3DVertexShader methods ***/
640     IWineD3DVertexShaderImpl_GetDevice,
641     IWineD3DVertexShaderImpl_GetFunction,
642     IWineD3DVertexShaderImpl_FakeSemantics,
643     IWIneD3DVertexShaderImpl_SetLocalConstantsF
644 };