wined3d: Initial post pixelshader blending support. [attempt 2].
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24
25 #include "config.h"
26
27 #include <math.h>
28 #include <stdio.h>
29
30 #include "wined3d_private.h"
31
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
35
36 /* Shader debugging - Change the following line to enable debugging of software
37       vertex shaders                                                             */
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
45
46 /**
47  * NVIDIA: DX8 Vertex Shader to NV Vertex Program
48  *  http://developer.nvidia.com/view.asp?IO=vstovp
49  *
50  * NVIDIA: Memory Management with VAR
51  *  http://developer.nvidia.com/view.asp?IO=var_memory_management
52  */
53
54 /* TODO: Vertex and Pixel shaders are almost identical, the only exception being the way that some of the data is looked up or the availability of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
55 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't surprise me if the program can be cross compiled using a large body of shared code */
56
57 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
58
59 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
60     /* This table is not order or position dependent. */
61
62     /* Arithmetic */
63     {WINED3DSIO_NOP,    "nop",  "NOP", 0, 0, vshader_hw_map2gl,   NULL, 0, 0},
64     {WINED3DSIO_MOV,    "mov",  "MOV", 1, 2, vshader_hw_map2gl,   shader_glsl_mov, 0, 0},
65     {WINED3DSIO_MOVA,   "mova",  NULL, 1, 2, vshader_hw_map2gl,   shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
66     {WINED3DSIO_ADD,    "add",  "ADD", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
67     {WINED3DSIO_SUB,    "sub",  "SUB", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
68     {WINED3DSIO_MAD,    "mad",  "MAD", 1, 4, vshader_hw_map2gl,   shader_glsl_mad, 0, 0},
69     {WINED3DSIO_MUL,    "mul",  "MUL", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
70     {WINED3DSIO_RCP,    "rcp",  "RCP", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rcp, 0, 0},
71     {WINED3DSIO_RSQ,    "rsq",  "RSQ", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rsq, 0, 0},
72     {WINED3DSIO_DP3,    "dp3",  "DP3", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
73     {WINED3DSIO_DP4,    "dp4",  "DP4", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
74     {WINED3DSIO_MIN,    "min",  "MIN", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
75     {WINED3DSIO_MAX,    "max",  "MAX", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
76     {WINED3DSIO_SLT,    "slt",  "SLT", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
77     {WINED3DSIO_SGE,    "sge",  "SGE", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
78     {WINED3DSIO_ABS,    "abs",  "ABS", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
79     {WINED3DSIO_EXP,    "exp",  "EX2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
80     {WINED3DSIO_LOG,    "log",  "LG2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
81     {WINED3DSIO_EXPP,   "expp", "EXP", 1, 2, vshader_hw_map2gl,   shader_glsl_expp, 0, 0},
82     {WINED3DSIO_LOGP,   "logp", "LOG", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
83     {WINED3DSIO_LIT,    "lit",  "LIT", 1, 2, vshader_hw_map2gl,   shader_glsl_lit, 0, 0},
84     {WINED3DSIO_DST,    "dst",  "DST", 1, 3, vshader_hw_map2gl,   shader_glsl_dst, 0, 0},
85     {WINED3DSIO_LRP,    "lrp",  "LRP", 1, 4, NULL,                shader_glsl_lrp, 0, 0},
86     {WINED3DSIO_FRC,    "frc",  "FRC", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
87     {WINED3DSIO_POW,    "pow",  "POW", 1, 3, vshader_hw_map2gl,   shader_glsl_pow, 0, 0},
88     {WINED3DSIO_CRS,    "crs",  "XPD", 1, 3, vshader_hw_map2gl,   shader_glsl_cross, 0, 0},
89     /* TODO: sng can possibly be performed a  s
90         RCP tmp, vec
91         MUL out, tmp, vec*/
92     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
93     {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
94     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
95     {WINED3DSIO_SINCOS, "sincos",  "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
96     /* Matrix */
97     {WINED3DSIO_M4x4,   "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
98     {WINED3DSIO_M4x3,   "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
99     {WINED3DSIO_M3x4,   "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
100     {WINED3DSIO_M3x3,   "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
101     {WINED3DSIO_M3x2,   "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
102     /* Declare registers */
103     {WINED3DSIO_DCL,    "dcl",      NULL,                0, 2, NULL, NULL, 0, 0},
104     /* Constant definitions */
105     {WINED3DSIO_DEF,    "def",      NULL,                1, 5, NULL, NULL, 0, 0},
106     {WINED3DSIO_DEFB,   "defb",     GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
107     {WINED3DSIO_DEFI,   "defi",     GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
108     /* Flow control - requires GLSL or software shaders */
109     {WINED3DSIO_REP ,   "rep",      NULL, 0, 1, NULL, shader_glsl_rep,    WINED3DVS_VERSION(2,0), -1},
110     {WINED3DSIO_ENDREP, "endrep",   NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
111     {WINED3DSIO_IF,     "if",       NULL, 0, 1, NULL, shader_glsl_if,     WINED3DVS_VERSION(2,0), -1},
112     {WINED3DSIO_IFC,    "ifc",      NULL, 0, 2, NULL, shader_glsl_ifc,    WINED3DVS_VERSION(2,1), -1},
113     {WINED3DSIO_ELSE,   "else",     NULL, 0, 0, NULL, shader_glsl_else,   WINED3DVS_VERSION(2,0), -1},
114     {WINED3DSIO_ENDIF,  "endif",    NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
115     {WINED3DSIO_BREAK,  "break",    NULL, 0, 0, NULL, shader_glsl_break,  WINED3DVS_VERSION(2,1), -1},
116     {WINED3DSIO_BREAKC, "breakc",   NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
117     {WINED3DSIO_BREAKP, "breakp",   GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
118     {WINED3DSIO_CALL,   "call",     NULL, 0, 1, NULL, shader_glsl_call,   WINED3DVS_VERSION(2,0), -1},
119     {WINED3DSIO_CALLNZ, "callnz",   NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
120     {WINED3DSIO_LOOP,   "loop",     NULL, 0, 2, NULL, shader_glsl_loop,   WINED3DVS_VERSION(2,0), -1},
121     {WINED3DSIO_RET,    "ret",      NULL, 0, 0, NULL, NULL,               WINED3DVS_VERSION(2,0), -1},
122     {WINED3DSIO_ENDLOOP,"endloop",  NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
123     {WINED3DSIO_LABEL,  "label",    NULL, 0, 1, NULL, shader_glsl_label,  WINED3DVS_VERSION(2,0), -1},
124
125     {WINED3DSIO_SETP,   "setp",     GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
126     {WINED3DSIO_TEXLDL, "texldl",   NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
127     {0,                 NULL,       NULL,                0, 0, NULL, NULL, 0, 0}
128 };
129
130 static void vshader_set_limits(
131       IWineD3DVertexShaderImpl *This) {
132
133       This->baseShader.limits.texcoord = 0;
134       This->baseShader.limits.attributes = 16;
135       This->baseShader.limits.packed_input = 0;
136
137       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
138       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
139
140       switch (This->baseShader.hex_version) {
141           case WINED3DVS_VERSION(1,0):
142           case WINED3DVS_VERSION(1,1):
143                    This->baseShader.limits.temporary = 12;
144                    This->baseShader.limits.constant_bool = 0;
145                    This->baseShader.limits.constant_int = 0;
146                    This->baseShader.limits.address = 1;
147                    This->baseShader.limits.packed_output = 0;
148                    This->baseShader.limits.sampler = 0;
149                    This->baseShader.limits.label = 0;
150                    break;
151       
152           case WINED3DVS_VERSION(2,0):
153           case WINED3DVS_VERSION(2,1):
154                    This->baseShader.limits.temporary = 12;
155                    This->baseShader.limits.constant_bool = 16;
156                    This->baseShader.limits.constant_int = 16;
157                    This->baseShader.limits.address = 1;
158                    This->baseShader.limits.packed_output = 0;
159                    This->baseShader.limits.sampler = 0;
160                    This->baseShader.limits.label = 16;
161                    break;
162
163           case WINED3DVS_VERSION(3,0):
164                    This->baseShader.limits.temporary = 32;
165                    This->baseShader.limits.constant_bool = 32;
166                    This->baseShader.limits.constant_int = 32;
167                    This->baseShader.limits.address = 1;
168                    This->baseShader.limits.packed_output = 12;
169                    This->baseShader.limits.sampler = 4;
170                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
171                    break;
172
173           default: This->baseShader.limits.temporary = 12;
174                    This->baseShader.limits.constant_bool = 16;
175                    This->baseShader.limits.constant_int = 16;
176                    This->baseShader.limits.address = 1;
177                    This->baseShader.limits.packed_output = 0;
178                    This->baseShader.limits.sampler = 0;
179                    This->baseShader.limits.label = 16;
180                    FIXME("Unrecognized vertex shader version %#x\n",
181                        This->baseShader.hex_version);
182       }
183 }
184
185 /* This is an internal function,
186  * used to create fake semantics for shaders
187  * that don't have them - d3d8 shaders where the declaration
188  * stores the register for each input
189  */
190 static void vshader_set_input(
191     IWineD3DVertexShaderImpl* This,
192     unsigned int regnum,
193     BYTE usage, BYTE usage_idx) {
194
195     /* Fake usage: set reserved bit, usage, usage_idx */
196     DWORD usage_token = (0x1 << 31) |
197         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
198
199     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
200     DWORD reg_token = (0x1 << 31) |
201         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
202
203     This->semantics_in[regnum].usage = usage_token;
204     This->semantics_in[regnum].reg = reg_token;
205 }
206
207 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
208     if (usage_idx1 != usage_idx2) return FALSE;
209     if (usage1 == usage2) return TRUE;
210     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
211     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
212
213     return FALSE;
214 }
215
216 BOOL vshader_get_input(
217     IWineD3DVertexShader* iface,
218     BYTE usage_req, BYTE usage_idx_req,
219     unsigned int* regnum) {
220
221     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
222     int i;
223
224     for (i = 0; i < MAX_ATTRIBS; i++) {
225         DWORD usage_token = This->semantics_in[i].usage;
226         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
227         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
228
229         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
230             *regnum = i;
231             return TRUE;
232         }
233     }
234     return FALSE;
235 }
236
237 BOOL vshader_input_is_color(
238     IWineD3DVertexShader* iface,
239     unsigned int regnum) {
240
241     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
242
243     DWORD usage_token = This->semantics_in[regnum].usage;
244     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
245     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
246
247     int i;
248
249     for(i = 0; i < This->num_swizzled_attribs; i++) {
250         if(This->swizzled_attribs[i].usage == usage &&
251            This->swizzled_attribs[i].idx == usage_idx) {
252             return TRUE;
253         }
254     }
255     return FALSE;
256 }
257
258 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
259     UINT num = 0, i, j;
260     UINT numoldswizzles = This->num_swizzled_attribs;
261     IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
262
263     DWORD usage_token, usage, usage_idx;
264     BOOL found;
265
266     attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
267
268     /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
269     memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
270
271     memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
272
273     for(i = 0; i < decl->num_swizzled_attribs; i++) {
274         for(j = 0; j < MAX_ATTRIBS; j++) {
275
276             if(!This->baseShader.reg_maps.attributes[j]) continue;
277
278             usage_token = This->semantics_in[j].usage;
279             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
280             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
281
282             if(decl->swizzled_attribs[i].usage == usage &&
283                decl->swizzled_attribs[i].idx == usage_idx) {
284                 This->swizzled_attribs[num].usage = usage;
285                 This->swizzled_attribs[num].idx = usage_idx;
286                 num++;
287             }
288         }
289     }
290
291     /* Add previously converted attributes back in if they are not defined in the current declaration */
292     for(i = 0; i < numoldswizzles; i++) {
293
294         found = FALSE;
295         for(j = 0; j < decl->declarationWNumElements; j++) {
296             if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
297                oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
298                 found = TRUE;
299             }
300         }
301         if(found) {
302             /* This previously converted attribute is declared in the current declaration. Either it is
303              * already in the new array, or it should not be there. Skip it
304              */
305             continue;
306         }
307         /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
308          * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
309          * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
310          * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
311          * stays unswizzled as well because it isn't found in the oldswizzles array
312          */
313         for(j = 0; j < num; j++) {
314             if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
315                oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
316                oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
317                 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
318                          sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
319                 break;
320             }
321         }
322         This->swizzled_attribs[j].usage = oldswizzles[i].usage;
323         This->swizzled_attribs[j].idx = oldswizzles[i].idx;
324         num++;
325     }
326
327     TRACE("New swizzled attributes array\n");
328     for(i = 0; i < num; i++) {
329         TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
330               This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
331     }
332     This->num_swizzled_attribs = num;
333 }
334 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
335     or GLSL and send it to the card */
336 static VOID IWineD3DVertexShaderImpl_GenerateShader(
337     IWineD3DVertexShader *iface,
338     shader_reg_maps* reg_maps,
339     CONST DWORD *pFunction) {
340
341     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
342     IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
343     SHADER_BUFFER buffer;
344
345     find_swizzled_attribs(decl, This);
346
347 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
348         it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
349     if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
350         HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
351         This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
352         This->fixupVertexBufferSize = PGMSIZE;
353         This->fixupVertexBuffer[0] = 0;
354     }
355     buffer.buffer = This->device->fixupVertexBuffer;
356 #else
357     buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); 
358 #endif
359     buffer.bsize = 0;
360     buffer.lineNo = 0;
361     buffer.newline = TRUE;
362
363     ((IWineD3DDeviceImpl *)This->baseShader.device)->shader_backend->shader_generate_vshader(iface, &buffer);
364
365 #if 1 /* if were using the data buffer of device then we don't need to free it */
366   HeapFree(GetProcessHeap(), 0, buffer.buffer);
367 #endif
368 }
369
370 /* *******************************************
371    IWineD3DVertexShader IUnknown parts follow
372    ******************************************* */
373 static HRESULT  WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
374     return IWineD3DBaseShaderImpl_QueryInterface((IWineD3DBaseShader *) iface, riid, ppobj);
375 }
376
377 static ULONG  WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
378     return IWineD3DBaseShaderImpl_AddRef((IWineD3DBaseShader *) iface);
379 }
380
381 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
382     return IWineD3DBaseShaderImpl_Release((IWineD3DBaseShader *) iface);
383 }
384
385 /* *******************************************
386    IWineD3DVertexShader IWineD3DVertexShader parts follow
387    ******************************************* */
388
389 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
390     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
391     
392     *parent = This->parent;
393     IUnknown_AddRef(*parent);
394     TRACE("(%p) : returning %p\n", This, *parent);
395     return WINED3D_OK;
396 }
397
398 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
399     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
400     IWineD3DDevice_AddRef(This->baseShader.device);
401     *pDevice = This->baseShader.device;
402     TRACE("(%p) returning %p\n", This, *pDevice);
403     return WINED3D_OK;
404 }
405
406 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
407     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
408     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
409
410     if (NULL == pData) {
411         *pSizeOfData = This->baseShader.functionLength;
412         return WINED3D_OK;
413     }
414     if (*pSizeOfData < This->baseShader.functionLength) {
415         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
416          * than the required size we should write the required size and
417          * return D3DERR_MOREDATA. That's not actually true. */
418         return WINED3DERR_INVALIDCALL;
419     }
420     if (NULL == This->baseShader.function) { /* no function defined */
421         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
422         (*(DWORD **) pData) = NULL;
423     } else {
424         if(This->baseShader.functionLength == 0){
425
426         }
427         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
428         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
429     }
430     return WINED3D_OK;
431 }
432
433 /* Note that for vertex shaders CompileShader isn't called until the
434  * shader is first used. The reason for this is that we need the vertex
435  * declaration the shader will be used with in order to determine if
436  * the data in a register is of type D3DCOLOR, and needs swizzling. */
437 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
438
439     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
440     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
441     HRESULT hr;
442     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
443
444     TRACE("(%p) : pFunction %p\n", iface, pFunction);
445
446     /* First pass: trace shader */
447     shader_trace_init((IWineD3DBaseShader*) This, pFunction);
448     vshader_set_limits(This);
449
450     /* Initialize immediate constant lists */
451     list_init(&This->baseShader.constantsF);
452     list_init(&This->baseShader.constantsB);
453     list_init(&This->baseShader.constantsI);
454
455     /* Second pass: figure out registers used, semantics, etc.. */
456     This->min_rel_offset = GL_LIMITS(vshader_constantsF);
457     This->max_rel_offset = 0;
458     memset(reg_maps, 0, sizeof(shader_reg_maps));
459     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
460        This->semantics_in, This->semantics_out, pFunction, NULL);
461     if (hr != WINED3D_OK) return hr;
462
463     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
464
465     if(deviceImpl->vs_selected_mode == SHADER_ARB &&
466        (GLINFO_LOCATION).arb_vs_offset_limit      &&
467        This->min_rel_offset <= This->max_rel_offset) {
468
469         if(This->max_rel_offset - This->min_rel_offset > 127) {
470             FIXME("The difference between the minimum and maximum relative offset is > 127\n");
471             FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
472             FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
473         } else if(This->max_rel_offset - This->min_rel_offset > 63) {
474             This->rel_offset = This->min_rel_offset + 63;
475         } else if(This->max_rel_offset > 63) {
476             This->rel_offset = This->min_rel_offset;
477         } else {
478             This->rel_offset = 0;
479         }
480     }
481     This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
482
483     /* copy the function ... because it will certainly be released by application */
484     if (NULL != pFunction) {
485         void *function;
486
487         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
488         if (!function) return E_OUTOFMEMORY;
489         memcpy(function, pFunction, This->baseShader.functionLength);
490         This->baseShader.function = function;
491     } else {
492         This->baseShader.function = NULL;
493     }
494
495     return WINED3D_OK;
496 }
497
498 /* Preload semantics for d3d8 shaders */
499 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
500     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
501     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
502
503     int i;
504     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
505         WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
506         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
507     }
508 }
509
510 /* Set local constants for d3d8 shaders */
511 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
512         UINT start_idx, const float *src_data, UINT count) {
513     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
514     UINT i, end_idx;
515
516     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
517
518     end_idx = start_idx + count;
519     if (end_idx > GL_LIMITS(vshader_constantsF)) {
520         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
521         end_idx = GL_LIMITS(vshader_constantsF);
522     }
523
524     for (i = start_idx; i < end_idx; ++i) {
525         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
526         if (!lconst) return E_OUTOFMEMORY;
527
528         lconst->idx = i;
529         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
530         list_add_head(&This->baseShader.constantsF, &lconst->entry);
531     }
532
533     return WINED3D_OK;
534 }
535
536 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
537     UINT i, j, k;
538     BOOL found;
539
540     DWORD usage_token;
541     DWORD usage;
542     DWORD usage_idx;
543
544     for(i = 0; i < vdecl->declarationWNumElements; i++) {
545         /* Ignore tesselated streams and the termination entry(position0, stream 255, unused) */
546         if(vdecl->pDeclarationWine[i].Stream >= MAX_STREAMS ||
547            vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_UNUSED) continue;
548
549         for(j = 0; j < MAX_ATTRIBS; j++) {
550             if(!This->baseShader.reg_maps.attributes[j]) continue;
551
552             usage_token = This->semantics_in[j].usage;
553             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
554             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
555
556             if(vdecl->pDeclarationWine[i].Usage != usage ||
557                vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
558                 continue;
559             }
560
561             found = FALSE;
562             for(k = 0; k < This->num_swizzled_attribs; k++) {
563                 if(This->swizzled_attribs[k].usage == usage &&
564                     This->swizzled_attribs[k].idx == usage_idx) {
565                     found = TRUE;
566                 }
567             }
568             if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
569                 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
570                       debug_d3ddeclusage(usage), usage_idx);
571                 return TRUE;
572             }
573             if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
574                 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
575                       debug_d3ddeclusage(usage), usage_idx);
576                 return TRUE;
577             }
578         }
579     }
580     return FALSE;
581 }
582
583 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
584     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
585     IWineD3DVertexDeclarationImpl *vdecl;
586     CONST DWORD *function = This->baseShader.function;
587     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
588
589     TRACE("(%p) : function %p\n", iface, function);
590
591     /* We're already compiled. */
592     if (This->baseShader.is_compiled) {
593         vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
594
595         if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
596            memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
597
598             /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
599              * we have to recompile, but we have to take a deeper look at see if the attribs that differ
600              * are declared in the decl and used in the shader
601              */
602             if(swizzled_attribs_differ(This, vdecl)) {
603                 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
604                 goto recompile;
605             }
606             WARN("Swizzled attribute validation required an expensive comparison\n");
607         }
608
609         return WINED3D_OK;
610
611         recompile:
612         if(This->recompile_count < 50) {
613             This->recompile_count++;
614         } else {
615             FIXME("Vertexshader %p recompiled more than 50 times\n", This);
616         }
617
618         deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
619     }
620
621     /* We don't need to compile */
622     if (!function) {
623         This->baseShader.is_compiled = TRUE;
624         return WINED3D_OK;
625     }
626
627     /* Generate the HW shader */
628     TRACE("(%p) : Generating hardware program\n", This);
629     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
630
631     This->baseShader.is_compiled = TRUE;
632
633     return WINED3D_OK;
634 }
635
636 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
637 {
638     /*** IUnknown methods ***/
639     IWineD3DVertexShaderImpl_QueryInterface,
640     IWineD3DVertexShaderImpl_AddRef,
641     IWineD3DVertexShaderImpl_Release,
642     /*** IWineD3DBase methods ***/
643     IWineD3DVertexShaderImpl_GetParent,
644     /*** IWineD3DBaseShader methods ***/
645     IWineD3DVertexShaderImpl_SetFunction,
646     IWineD3DVertexShaderImpl_CompileShader,
647     /*** IWineD3DVertexShader methods ***/
648     IWineD3DVertexShaderImpl_GetDevice,
649     IWineD3DVertexShaderImpl_GetFunction,
650     IWineD3DVertexShaderImpl_FakeSemantics,
651     IWIneD3DVertexShaderImpl_SetLocalConstantsF
652 };