wined3d: Merge pshader_hw_map2gl() and vshader_hw_map2gl().
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with this library; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24  */
25
26 #include "config.h"
27
28 #include <math.h>
29 #include <stdio.h>
30
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34
35 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
36
37 /* TODO: Vertex and Pixel shaders are almost identical, the only exception being the way that some of the data is looked up or the availability of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
38 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't surprise me if the program can be cross compiled using a large body of shared code */
39
40 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
41
42 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
43     /* This table is not order or position dependent. */
44
45     /* Arithmetic */
46     {WINED3DSIO_NOP,    "nop",  "NOP", 0, 0, shader_hw_map2gl,    NULL, 0, 0},
47     {WINED3DSIO_MOV,    "mov",  "MOV", 1, 2, shader_hw_map2gl,    shader_glsl_mov, 0, 0},
48     {WINED3DSIO_MOVA,   "mova",  NULL, 1, 2, shader_hw_map2gl,    shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
49     {WINED3DSIO_ADD,    "add",  "ADD", 1, 3, shader_hw_map2gl,    shader_glsl_arith, 0, 0},
50     {WINED3DSIO_SUB,    "sub",  "SUB", 1, 3, shader_hw_map2gl,    shader_glsl_arith, 0, 0},
51     {WINED3DSIO_MAD,    "mad",  "MAD", 1, 4, shader_hw_map2gl,    shader_glsl_mad, 0, 0},
52     {WINED3DSIO_MUL,    "mul",  "MUL", 1, 3, shader_hw_map2gl,    shader_glsl_arith, 0, 0},
53     {WINED3DSIO_RCP,    "rcp",  "RCP", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rcp, 0, 0},
54     {WINED3DSIO_RSQ,    "rsq",  "RSQ", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rsq, 0, 0},
55     {WINED3DSIO_DP3,    "dp3",  "DP3", 1, 3, shader_hw_map2gl,    shader_glsl_dot, 0, 0},
56     {WINED3DSIO_DP4,    "dp4",  "DP4", 1, 3, shader_hw_map2gl,    shader_glsl_dot, 0, 0},
57     {WINED3DSIO_MIN,    "min",  "MIN", 1, 3, shader_hw_map2gl,    shader_glsl_map2gl, 0, 0},
58     {WINED3DSIO_MAX,    "max",  "MAX", 1, 3, shader_hw_map2gl,    shader_glsl_map2gl, 0, 0},
59     {WINED3DSIO_SLT,    "slt",  "SLT", 1, 3, shader_hw_map2gl,    shader_glsl_compare, 0, 0},
60     {WINED3DSIO_SGE,    "sge",  "SGE", 1, 3, shader_hw_map2gl,    shader_glsl_compare, 0, 0},
61     {WINED3DSIO_ABS,    "abs",  "ABS", 1, 2, shader_hw_map2gl,    shader_glsl_map2gl, 0, 0},
62     {WINED3DSIO_EXP,    "exp",  "EX2", 1, 2, shader_hw_map2gl,    shader_glsl_map2gl, 0, 0},
63     {WINED3DSIO_LOG,    "log",  "LG2", 1, 2, shader_hw_map2gl,    shader_glsl_map2gl, 0, 0},
64     {WINED3DSIO_EXPP,   "expp", "EXP", 1, 2, shader_hw_map2gl,    shader_glsl_expp, 0, 0},
65     {WINED3DSIO_LOGP,   "logp", "LOG", 1, 2, shader_hw_map2gl,    shader_glsl_map2gl, 0, 0},
66     {WINED3DSIO_LIT,    "lit",  "LIT", 1, 2, shader_hw_map2gl,    shader_glsl_lit, 0, 0},
67     {WINED3DSIO_DST,    "dst",  "DST", 1, 3, shader_hw_map2gl,    shader_glsl_dst, 0, 0},
68     {WINED3DSIO_LRP,    "lrp",  "LRP", 1, 4, NULL,                shader_glsl_lrp, 0, 0},
69     {WINED3DSIO_FRC,    "frc",  "FRC", 1, 2, shader_hw_map2gl,    shader_glsl_map2gl, 0, 0},
70     {WINED3DSIO_POW,    "pow",  "POW", 1, 3, shader_hw_map2gl,    shader_glsl_pow, 0, 0},
71     {WINED3DSIO_CRS,    "crs",  "XPD", 1, 3, shader_hw_map2gl,    shader_glsl_cross, 0, 0},
72     /* TODO: sng can possibly be performed a  s
73         RCP tmp, vec
74         MUL out, tmp, vec*/
75     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
76     {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
77     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
78     {WINED3DSIO_SINCOS, "sincos",  "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
79     /* Matrix */
80     {WINED3DSIO_M4x4,   "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
81     {WINED3DSIO_M4x3,   "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
82     {WINED3DSIO_M3x4,   "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
83     {WINED3DSIO_M3x3,   "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
84     {WINED3DSIO_M3x2,   "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
85     /* Declare registers */
86     {WINED3DSIO_DCL,    "dcl",      NULL,                0, 2, NULL, NULL, 0, 0},
87     /* Constant definitions */
88     {WINED3DSIO_DEF,    "def",      NULL,                1, 5, NULL, NULL, 0, 0},
89     {WINED3DSIO_DEFB,   "defb",     GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
90     {WINED3DSIO_DEFI,   "defi",     GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
91     /* Flow control - requires GLSL or software shaders */
92     {WINED3DSIO_REP ,   "rep",      NULL, 0, 1, NULL, shader_glsl_rep,    WINED3DVS_VERSION(2,0), -1},
93     {WINED3DSIO_ENDREP, "endrep",   NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
94     {WINED3DSIO_IF,     "if",       NULL, 0, 1, NULL, shader_glsl_if,     WINED3DVS_VERSION(2,0), -1},
95     {WINED3DSIO_IFC,    "ifc",      NULL, 0, 2, NULL, shader_glsl_ifc,    WINED3DVS_VERSION(2,1), -1},
96     {WINED3DSIO_ELSE,   "else",     NULL, 0, 0, NULL, shader_glsl_else,   WINED3DVS_VERSION(2,0), -1},
97     {WINED3DSIO_ENDIF,  "endif",    NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
98     {WINED3DSIO_BREAK,  "break",    NULL, 0, 0, NULL, shader_glsl_break,  WINED3DVS_VERSION(2,1), -1},
99     {WINED3DSIO_BREAKC, "breakc",   NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
100     {WINED3DSIO_BREAKP, "breakp",   GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
101     {WINED3DSIO_CALL,   "call",     NULL, 0, 1, NULL, shader_glsl_call,   WINED3DVS_VERSION(2,0), -1},
102     {WINED3DSIO_CALLNZ, "callnz",   NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
103     {WINED3DSIO_LOOP,   "loop",     NULL, 0, 2, NULL, shader_glsl_loop,   WINED3DVS_VERSION(2,0), -1},
104     {WINED3DSIO_RET,    "ret",      NULL, 0, 0, NULL, NULL,               WINED3DVS_VERSION(2,0), -1},
105     {WINED3DSIO_ENDLOOP,"endloop",  NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
106     {WINED3DSIO_LABEL,  "label",    NULL, 0, 1, NULL, shader_glsl_label,  WINED3DVS_VERSION(2,0), -1},
107
108     {WINED3DSIO_SETP,   "setp",     GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
109     {WINED3DSIO_TEXLDL, "texldl",   NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
110     {0,                 NULL,       NULL,                0, 0, NULL, NULL, 0, 0}
111 };
112
113 static void vshader_set_limits(
114       IWineD3DVertexShaderImpl *This) {
115
116       This->baseShader.limits.texcoord = 0;
117       This->baseShader.limits.attributes = 16;
118       This->baseShader.limits.packed_input = 0;
119
120       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
121       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
122
123       switch (This->baseShader.hex_version) {
124           case WINED3DVS_VERSION(1,0):
125           case WINED3DVS_VERSION(1,1):
126                    This->baseShader.limits.temporary = 12;
127                    This->baseShader.limits.constant_bool = 0;
128                    This->baseShader.limits.constant_int = 0;
129                    This->baseShader.limits.address = 1;
130                    This->baseShader.limits.packed_output = 0;
131                    This->baseShader.limits.sampler = 0;
132                    This->baseShader.limits.label = 0;
133                    break;
134       
135           case WINED3DVS_VERSION(2,0):
136           case WINED3DVS_VERSION(2,1):
137                    This->baseShader.limits.temporary = 12;
138                    This->baseShader.limits.constant_bool = 16;
139                    This->baseShader.limits.constant_int = 16;
140                    This->baseShader.limits.address = 1;
141                    This->baseShader.limits.packed_output = 0;
142                    This->baseShader.limits.sampler = 0;
143                    This->baseShader.limits.label = 16;
144                    break;
145
146           case WINED3DVS_VERSION(3,0):
147                    This->baseShader.limits.temporary = 32;
148                    This->baseShader.limits.constant_bool = 32;
149                    This->baseShader.limits.constant_int = 32;
150                    This->baseShader.limits.address = 1;
151                    This->baseShader.limits.packed_output = 12;
152                    This->baseShader.limits.sampler = 4;
153                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
154                    break;
155
156           default: This->baseShader.limits.temporary = 12;
157                    This->baseShader.limits.constant_bool = 16;
158                    This->baseShader.limits.constant_int = 16;
159                    This->baseShader.limits.address = 1;
160                    This->baseShader.limits.packed_output = 0;
161                    This->baseShader.limits.sampler = 0;
162                    This->baseShader.limits.label = 16;
163                    FIXME("Unrecognized vertex shader version %#x\n",
164                        This->baseShader.hex_version);
165       }
166 }
167
168 /* This is an internal function,
169  * used to create fake semantics for shaders
170  * that don't have them - d3d8 shaders where the declaration
171  * stores the register for each input
172  */
173 static void vshader_set_input(
174     IWineD3DVertexShaderImpl* This,
175     unsigned int regnum,
176     BYTE usage, BYTE usage_idx) {
177
178     /* Fake usage: set reserved bit, usage, usage_idx */
179     DWORD usage_token = (0x1 << 31) |
180         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
181
182     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
183     DWORD reg_token = (0x1 << 31) |
184         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
185
186     This->semantics_in[regnum].usage = usage_token;
187     This->semantics_in[regnum].reg = reg_token;
188 }
189
190 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
191     if (usage_idx1 != usage_idx2) return FALSE;
192     if (usage1 == usage2) return TRUE;
193     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
194     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
195
196     return FALSE;
197 }
198
199 BOOL vshader_get_input(
200     IWineD3DVertexShader* iface,
201     BYTE usage_req, BYTE usage_idx_req,
202     unsigned int* regnum) {
203
204     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
205     int i;
206
207     for (i = 0; i < MAX_ATTRIBS; i++) {
208         DWORD usage_token = This->semantics_in[i].usage;
209         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
210         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
211
212         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
213             *regnum = i;
214             return TRUE;
215         }
216     }
217     return FALSE;
218 }
219
220 BOOL vshader_input_is_color(
221     IWineD3DVertexShader* iface,
222     unsigned int regnum) {
223
224     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
225
226     DWORD usage_token = This->semantics_in[regnum].usage;
227     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
228     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
229
230     int i;
231
232     for(i = 0; i < This->num_swizzled_attribs; i++) {
233         if(This->swizzled_attribs[i].usage == usage &&
234            This->swizzled_attribs[i].idx == usage_idx) {
235             return TRUE;
236         }
237     }
238     return FALSE;
239 }
240
241 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
242     UINT num = 0, i, j;
243     UINT numoldswizzles = This->num_swizzled_attribs;
244     IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
245
246     DWORD usage_token, usage, usage_idx;
247     BOOL found;
248
249     attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
250
251     /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
252     memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
253
254     memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
255
256     for(i = 0; i < decl->num_swizzled_attribs; i++) {
257         for(j = 0; j < MAX_ATTRIBS; j++) {
258
259             if(!This->baseShader.reg_maps.attributes[j]) continue;
260
261             usage_token = This->semantics_in[j].usage;
262             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
263             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
264
265             if(decl->swizzled_attribs[i].usage == usage &&
266                decl->swizzled_attribs[i].idx == usage_idx) {
267                 This->swizzled_attribs[num].usage = usage;
268                 This->swizzled_attribs[num].idx = usage_idx;
269                 num++;
270             }
271         }
272     }
273
274     /* Add previously converted attributes back in if they are not defined in the current declaration */
275     for(i = 0; i < numoldswizzles; i++) {
276
277         found = FALSE;
278         for(j = 0; j < decl->declarationWNumElements; j++) {
279             if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
280                oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
281                 found = TRUE;
282             }
283         }
284         if(found) {
285             /* This previously converted attribute is declared in the current declaration. Either it is
286              * already in the new array, or it should not be there. Skip it
287              */
288             continue;
289         }
290         /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
291          * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
292          * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
293          * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
294          * stays unswizzled as well because it isn't found in the oldswizzles array
295          */
296         for(j = 0; j < num; j++) {
297             if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
298                oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
299                oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
300                 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
301                          sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
302                 break;
303             }
304         }
305         This->swizzled_attribs[j].usage = oldswizzles[i].usage;
306         This->swizzled_attribs[j].idx = oldswizzles[i].idx;
307         num++;
308     }
309
310     TRACE("New swizzled attributes array\n");
311     for(i = 0; i < num; i++) {
312         TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
313               This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
314     }
315     This->num_swizzled_attribs = num;
316 }
317 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
318     or GLSL and send it to the card */
319 static VOID IWineD3DVertexShaderImpl_GenerateShader(
320     IWineD3DVertexShader *iface,
321     shader_reg_maps* reg_maps,
322     CONST DWORD *pFunction) {
323
324     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
325     IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
326     SHADER_BUFFER buffer;
327
328     find_swizzled_attribs(decl, This);
329
330 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
331         it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
332     if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
333         HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
334         This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
335         This->fixupVertexBufferSize = PGMSIZE;
336         This->fixupVertexBuffer[0] = 0;
337     }
338     buffer.buffer = This->device->fixupVertexBuffer;
339 #else
340     buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); 
341 #endif
342     buffer.bsize = 0;
343     buffer.lineNo = 0;
344     buffer.newline = TRUE;
345
346     ((IWineD3DDeviceImpl *)This->baseShader.device)->shader_backend->shader_generate_vshader(iface, &buffer);
347
348 #if 1 /* if were using the data buffer of device then we don't need to free it */
349   HeapFree(GetProcessHeap(), 0, buffer.buffer);
350 #endif
351 }
352
353 /* *******************************************
354    IWineD3DVertexShader IUnknown parts follow
355    ******************************************* */
356 static HRESULT  WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
357     return IWineD3DBaseShaderImpl_QueryInterface((IWineD3DBaseShader *) iface, riid, ppobj);
358 }
359
360 static ULONG  WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
361     return IWineD3DBaseShaderImpl_AddRef((IWineD3DBaseShader *) iface);
362 }
363
364 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
365     return IWineD3DBaseShaderImpl_Release((IWineD3DBaseShader *) iface);
366 }
367
368 /* *******************************************
369    IWineD3DVertexShader IWineD3DVertexShader parts follow
370    ******************************************* */
371
372 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
373     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
374     
375     *parent = This->parent;
376     IUnknown_AddRef(*parent);
377     TRACE("(%p) : returning %p\n", This, *parent);
378     return WINED3D_OK;
379 }
380
381 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
382     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
383     IWineD3DDevice_AddRef(This->baseShader.device);
384     *pDevice = This->baseShader.device;
385     TRACE("(%p) returning %p\n", This, *pDevice);
386     return WINED3D_OK;
387 }
388
389 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
390     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
391     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
392
393     if (NULL == pData) {
394         *pSizeOfData = This->baseShader.functionLength;
395         return WINED3D_OK;
396     }
397     if (*pSizeOfData < This->baseShader.functionLength) {
398         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
399          * than the required size we should write the required size and
400          * return D3DERR_MOREDATA. That's not actually true. */
401         return WINED3DERR_INVALIDCALL;
402     }
403     if (NULL == This->baseShader.function) { /* no function defined */
404         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
405         (*(DWORD **) pData) = NULL;
406     } else {
407         if(This->baseShader.functionLength == 0){
408
409         }
410         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
411         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
412     }
413     return WINED3D_OK;
414 }
415
416 /* Note that for vertex shaders CompileShader isn't called until the
417  * shader is first used. The reason for this is that we need the vertex
418  * declaration the shader will be used with in order to determine if
419  * the data in a register is of type D3DCOLOR, and needs swizzling. */
420 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
421
422     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
423     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
424     HRESULT hr;
425     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
426
427     TRACE("(%p) : pFunction %p\n", iface, pFunction);
428
429     /* First pass: trace shader */
430     shader_trace_init((IWineD3DBaseShader*) This, pFunction);
431     vshader_set_limits(This);
432
433     /* Initialize immediate constant lists */
434     list_init(&This->baseShader.constantsF);
435     list_init(&This->baseShader.constantsB);
436     list_init(&This->baseShader.constantsI);
437
438     /* Second pass: figure out registers used, semantics, etc.. */
439     This->min_rel_offset = GL_LIMITS(vshader_constantsF);
440     This->max_rel_offset = 0;
441     memset(reg_maps, 0, sizeof(shader_reg_maps));
442     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
443        This->semantics_in, This->semantics_out, pFunction, NULL);
444     if (hr != WINED3D_OK) return hr;
445
446     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
447
448     if(deviceImpl->vs_selected_mode == SHADER_ARB &&
449        (GLINFO_LOCATION).arb_vs_offset_limit      &&
450        This->min_rel_offset <= This->max_rel_offset) {
451
452         if(This->max_rel_offset - This->min_rel_offset > 127) {
453             FIXME("The difference between the minimum and maximum relative offset is > 127\n");
454             FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
455             FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
456         } else if(This->max_rel_offset - This->min_rel_offset > 63) {
457             This->rel_offset = This->min_rel_offset + 63;
458         } else if(This->max_rel_offset > 63) {
459             This->rel_offset = This->min_rel_offset;
460         } else {
461             This->rel_offset = 0;
462         }
463     }
464     This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
465
466     /* copy the function ... because it will certainly be released by application */
467     if (NULL != pFunction) {
468         void *function;
469
470         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
471         if (!function) return E_OUTOFMEMORY;
472         memcpy(function, pFunction, This->baseShader.functionLength);
473         This->baseShader.function = function;
474     } else {
475         This->baseShader.function = NULL;
476     }
477
478     return WINED3D_OK;
479 }
480
481 /* Preload semantics for d3d8 shaders */
482 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
483     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
484     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
485
486     int i;
487     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
488         WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
489         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
490     }
491 }
492
493 /* Set local constants for d3d8 shaders */
494 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
495         UINT start_idx, const float *src_data, UINT count) {
496     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
497     UINT i, end_idx;
498
499     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
500
501     end_idx = start_idx + count;
502     if (end_idx > GL_LIMITS(vshader_constantsF)) {
503         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
504         end_idx = GL_LIMITS(vshader_constantsF);
505     }
506
507     for (i = start_idx; i < end_idx; ++i) {
508         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
509         if (!lconst) return E_OUTOFMEMORY;
510
511         lconst->idx = i;
512         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
513         list_add_head(&This->baseShader.constantsF, &lconst->entry);
514     }
515
516     return WINED3D_OK;
517 }
518
519 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
520     UINT i, j, k;
521     BOOL found;
522
523     DWORD usage_token;
524     DWORD usage;
525     DWORD usage_idx;
526
527     for(i = 0; i < vdecl->declarationWNumElements; i++) {
528         /* Ignore tesselated streams and the termination entry(position0, stream 255, unused) */
529         if(vdecl->pDeclarationWine[i].Stream >= MAX_STREAMS ||
530            vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_UNUSED) continue;
531
532         for(j = 0; j < MAX_ATTRIBS; j++) {
533             if(!This->baseShader.reg_maps.attributes[j]) continue;
534
535             usage_token = This->semantics_in[j].usage;
536             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
537             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
538
539             if(vdecl->pDeclarationWine[i].Usage != usage ||
540                vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
541                 continue;
542             }
543
544             found = FALSE;
545             for(k = 0; k < This->num_swizzled_attribs; k++) {
546                 if(This->swizzled_attribs[k].usage == usage &&
547                     This->swizzled_attribs[k].idx == usage_idx) {
548                     found = TRUE;
549                 }
550             }
551             if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
552                 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
553                       debug_d3ddeclusage(usage), usage_idx);
554                 return TRUE;
555             }
556             if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
557                 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
558                       debug_d3ddeclusage(usage), usage_idx);
559                 return TRUE;
560             }
561         }
562     }
563     return FALSE;
564 }
565
566 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
567     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
568     IWineD3DVertexDeclarationImpl *vdecl;
569     CONST DWORD *function = This->baseShader.function;
570     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
571
572     TRACE("(%p) : function %p\n", iface, function);
573
574     /* We're already compiled. */
575     if (This->baseShader.is_compiled) {
576         vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
577
578         if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
579            memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
580
581             /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
582              * we have to recompile, but we have to take a deeper look at see if the attribs that differ
583              * are declared in the decl and used in the shader
584              */
585             if(swizzled_attribs_differ(This, vdecl)) {
586                 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
587                 goto recompile;
588             }
589             WARN("Swizzled attribute validation required an expensive comparison\n");
590         }
591
592         return WINED3D_OK;
593
594         recompile:
595         if(This->recompile_count < 50) {
596             This->recompile_count++;
597         } else {
598             FIXME("Vertexshader %p recompiled more than 50 times\n", This);
599         }
600
601         deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
602     }
603
604     /* We don't need to compile */
605     if (!function) {
606         This->baseShader.is_compiled = TRUE;
607         return WINED3D_OK;
608     }
609
610     /* Generate the HW shader */
611     TRACE("(%p) : Generating hardware program\n", This);
612     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
613
614     This->baseShader.is_compiled = TRUE;
615
616     return WINED3D_OK;
617 }
618
619 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
620 {
621     /*** IUnknown methods ***/
622     IWineD3DVertexShaderImpl_QueryInterface,
623     IWineD3DVertexShaderImpl_AddRef,
624     IWineD3DVertexShaderImpl_Release,
625     /*** IWineD3DBase methods ***/
626     IWineD3DVertexShaderImpl_GetParent,
627     /*** IWineD3DBaseShader methods ***/
628     IWineD3DVertexShaderImpl_SetFunction,
629     IWineD3DVertexShaderImpl_CompileShader,
630     /*** IWineD3DVertexShader methods ***/
631     IWineD3DVertexShaderImpl_GetDevice,
632     IWineD3DVertexShaderImpl_GetFunction,
633     IWineD3DVertexShaderImpl_FakeSemantics,
634     IWIneD3DVertexShaderImpl_SetLocalConstantsF
635 };