wined3d: Add d3d10 primitive types.
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with this library; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24  */
25
26 #include "config.h"
27
28 #include <math.h>
29 #include <stdio.h>
30
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34
35 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
36
37 /* TODO: Vertex and Pixel shaders are almost identical, the only exception being the way that some of the data is looked up or the availability of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
38 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't surprise me if the program can be cross compiled using a large body of shared code */
39
40 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
41     /* This table is not order or position dependent. */
42
43     /* Arithmetic */
44     {WINED3DSIO_NOP,     "nop",     0, 0, WINED3DSIH_NOP,     0,                      0                     },
45     {WINED3DSIO_MOV,     "mov",     1, 2, WINED3DSIH_MOV,     0,                      0                     },
46     {WINED3DSIO_MOVA,    "mova",    1, 2, WINED3DSIH_MOVA,    WINED3DVS_VERSION(2,0), -1                    },
47     {WINED3DSIO_ADD,     "add",     1, 3, WINED3DSIH_ADD,     0,                      0                     },
48     {WINED3DSIO_SUB,     "sub",     1, 3, WINED3DSIH_SUB,     0,                      0                     },
49     {WINED3DSIO_MAD,     "mad",     1, 4, WINED3DSIH_MAD,     0,                      0                     },
50     {WINED3DSIO_MUL,     "mul",     1, 3, WINED3DSIH_MUL,     0,                      0                     },
51     {WINED3DSIO_RCP,     "rcp",     1, 2, WINED3DSIH_RCP,     0,                      0                     },
52     {WINED3DSIO_RSQ,     "rsq",     1, 2, WINED3DSIH_RSQ,     0,                      0                     },
53     {WINED3DSIO_DP3,     "dp3",     1, 3, WINED3DSIH_DP3,     0,                      0                     },
54     {WINED3DSIO_DP4,     "dp4",     1, 3, WINED3DSIH_DP4,     0,                      0                     },
55     {WINED3DSIO_MIN,     "min",     1, 3, WINED3DSIH_MIN,     0,                      0                     },
56     {WINED3DSIO_MAX,     "max",     1, 3, WINED3DSIH_MAX,     0,                      0                     },
57     {WINED3DSIO_SLT,     "slt",     1, 3, WINED3DSIH_SLT,     0,                      0                     },
58     {WINED3DSIO_SGE,     "sge",     1, 3, WINED3DSIH_SGE,     0,                      0                     },
59     {WINED3DSIO_ABS,     "abs",     1, 2, WINED3DSIH_ABS,     0,                      0                     },
60     {WINED3DSIO_EXP,     "exp",     1, 2, WINED3DSIH_EXP,     0,                      0                     },
61     {WINED3DSIO_LOG,     "log",     1, 2, WINED3DSIH_LOG,     0,                      0                     },
62     {WINED3DSIO_EXPP,    "expp",    1, 2, WINED3DSIH_EXPP,    0,                      0                     },
63     {WINED3DSIO_LOGP,    "logp",    1, 2, WINED3DSIH_LOGP,    0,                      0                     },
64     {WINED3DSIO_LIT,     "lit",     1, 2, WINED3DSIH_LIT,     0,                      0                     },
65     {WINED3DSIO_DST,     "dst",     1, 3, WINED3DSIH_DST,     0,                      0                     },
66     {WINED3DSIO_LRP,     "lrp",     1, 4, WINED3DSIH_LRP,     0,                      0                     },
67     {WINED3DSIO_FRC,     "frc",     1, 2, WINED3DSIH_FRC,     0,                      0                     },
68     {WINED3DSIO_POW,     "pow",     1, 3, WINED3DSIH_POW,     0,                      0                     },
69     {WINED3DSIO_CRS,     "crs",     1, 3, WINED3DSIH_CRS,     0,                      0                     },
70     /* TODO: sng can possibly be performed as
71         RCP tmp, vec
72         MUL out, tmp, vec*/
73     {WINED3DSIO_SGN,     "sgn",     1, 2, WINED3DSIH_SGN,     0,                      0                     },
74     {WINED3DSIO_NRM,     "nrm",     1, 2, WINED3DSIH_NRM,     0,                      0                     },
75     {WINED3DSIO_SINCOS,  "sincos",  1, 4, WINED3DSIH_SINCOS,  WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
76     {WINED3DSIO_SINCOS,  "sincos",  1, 2, WINED3DSIH_SINCOS,  WINED3DVS_VERSION(3,0), -1                    },
77     /* Matrix */
78     {WINED3DSIO_M4x4,    "m4x4",    1, 3, WINED3DSIH_M4x4,    0,                      0                     },
79     {WINED3DSIO_M4x3,    "m4x3",    1, 3, WINED3DSIH_M4x3,    0,                      0                     },
80     {WINED3DSIO_M3x4,    "m3x4",    1, 3, WINED3DSIH_M3x4,    0,                      0                     },
81     {WINED3DSIO_M3x3,    "m3x3",    1, 3, WINED3DSIH_M3x3,    0,                      0                     },
82     {WINED3DSIO_M3x2,    "m3x2",    1, 3, WINED3DSIH_M3x2,    0,                      0                     },
83     /* Declare registers */
84     {WINED3DSIO_DCL,     "dcl",     0, 2, WINED3DSIH_DCL,     0,                      0                     },
85     /* Constant definitions */
86     {WINED3DSIO_DEF,     "def",     1, 5, WINED3DSIH_DEF,     0,                      0                     },
87     {WINED3DSIO_DEFB,    "defb",    1, 2, WINED3DSIH_DEFB,    0,                      0                     },
88     {WINED3DSIO_DEFI,    "defi",    1, 5, WINED3DSIH_DEFI,    0,                      0                     },
89     /* Flow control - requires GLSL or software shaders */
90     {WINED3DSIO_REP ,    "rep",     0, 1, WINED3DSIH_REP,     WINED3DVS_VERSION(2,0), -1                    },
91     {WINED3DSIO_ENDREP,  "endrep",  0, 0, WINED3DSIH_ENDREP,  WINED3DVS_VERSION(2,0), -1                    },
92     {WINED3DSIO_IF,      "if",      0, 1, WINED3DSIH_IF,      WINED3DVS_VERSION(2,0), -1                    },
93     {WINED3DSIO_IFC,     "ifc",     0, 2, WINED3DSIH_IFC,     WINED3DVS_VERSION(2,1), -1                    },
94     {WINED3DSIO_ELSE,    "else",    0, 0, WINED3DSIH_ELSE,    WINED3DVS_VERSION(2,0), -1                    },
95     {WINED3DSIO_ENDIF,   "endif",   0, 0, WINED3DSIH_ENDIF,   WINED3DVS_VERSION(2,0), -1                    },
96     {WINED3DSIO_BREAK,   "break",   0, 0, WINED3DSIH_BREAK,   WINED3DVS_VERSION(2,1), -1                    },
97     {WINED3DSIO_BREAKC,  "breakc",  0, 2, WINED3DSIH_BREAKC,  WINED3DVS_VERSION(2,1), -1                    },
98     {WINED3DSIO_BREAKP,  "breakp",  0, 1, WINED3DSIH_BREAKP,  0,                      0                     },
99     {WINED3DSIO_CALL,    "call",    0, 1, WINED3DSIH_CALL,    WINED3DVS_VERSION(2,0), -1                    },
100     {WINED3DSIO_CALLNZ,  "callnz",  0, 2, WINED3DSIH_CALLNZ,  WINED3DVS_VERSION(2,0), -1                    },
101     {WINED3DSIO_LOOP,    "loop",    0, 2, WINED3DSIH_LOOP,    WINED3DVS_VERSION(2,0), -1                    },
102     {WINED3DSIO_RET,     "ret",     0, 0, WINED3DSIH_RET,     WINED3DVS_VERSION(2,0), -1                    },
103     {WINED3DSIO_ENDLOOP, "endloop", 0, 0, WINED3DSIH_ENDLOOP, WINED3DVS_VERSION(2,0), -1                    },
104     {WINED3DSIO_LABEL,   "label",   0, 1, WINED3DSIH_LABEL,   WINED3DVS_VERSION(2,0), -1                    },
105
106     {WINED3DSIO_SETP,    "setp",    1, 3, WINED3DSIH_SETP,    0,                      0                     },
107     {WINED3DSIO_TEXLDL,  "texldl",  1, 3, WINED3DSIH_TEXLDL,  WINED3DVS_VERSION(3,0), -1                    },
108     {0,                  NULL,      0, 0, 0,                  0,                      0                     }
109 };
110
111 static void vshader_set_limits(
112       IWineD3DVertexShaderImpl *This) {
113
114       This->baseShader.limits.texcoord = 0;
115       This->baseShader.limits.attributes = 16;
116       This->baseShader.limits.packed_input = 0;
117
118       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
119       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
120
121       switch (This->baseShader.reg_maps.shader_version)
122       {
123           case WINED3DVS_VERSION(1,0):
124           case WINED3DVS_VERSION(1,1):
125                    This->baseShader.limits.temporary = 12;
126                    This->baseShader.limits.constant_bool = 0;
127                    This->baseShader.limits.constant_int = 0;
128                    This->baseShader.limits.address = 1;
129                    This->baseShader.limits.packed_output = 0;
130                    This->baseShader.limits.sampler = 0;
131                    This->baseShader.limits.label = 0;
132                    break;
133       
134           case WINED3DVS_VERSION(2,0):
135           case WINED3DVS_VERSION(2,1):
136                    This->baseShader.limits.temporary = 12;
137                    This->baseShader.limits.constant_bool = 16;
138                    This->baseShader.limits.constant_int = 16;
139                    This->baseShader.limits.address = 1;
140                    This->baseShader.limits.packed_output = 0;
141                    This->baseShader.limits.sampler = 0;
142                    This->baseShader.limits.label = 16;
143                    break;
144
145           case WINED3DVS_VERSION(3,0):
146                    This->baseShader.limits.temporary = 32;
147                    This->baseShader.limits.constant_bool = 32;
148                    This->baseShader.limits.constant_int = 32;
149                    This->baseShader.limits.address = 1;
150                    This->baseShader.limits.packed_output = 12;
151                    This->baseShader.limits.sampler = 4;
152                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
153                    break;
154
155           default: This->baseShader.limits.temporary = 12;
156                    This->baseShader.limits.constant_bool = 16;
157                    This->baseShader.limits.constant_int = 16;
158                    This->baseShader.limits.address = 1;
159                    This->baseShader.limits.packed_output = 0;
160                    This->baseShader.limits.sampler = 0;
161                    This->baseShader.limits.label = 16;
162                    FIXME("Unrecognized vertex shader version %#x\n",
163                            This->baseShader.reg_maps.shader_version);
164       }
165 }
166
167 /* This is an internal function,
168  * used to create fake semantics for shaders
169  * that don't have them - d3d8 shaders where the declaration
170  * stores the register for each input
171  */
172 static void vshader_set_input(
173     IWineD3DVertexShaderImpl* This,
174     unsigned int regnum,
175     BYTE usage, BYTE usage_idx) {
176
177     /* Fake usage: set reserved bit, usage, usage_idx */
178     DWORD usage_token = (0x1 << 31) |
179         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
180
181     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
182     DWORD reg_token = (0x1 << 31) |
183         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
184
185     This->semantics_in[regnum].usage = usage_token;
186     This->semantics_in[regnum].reg = reg_token;
187 }
188
189 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
190     if (usage_idx1 != usage_idx2) return FALSE;
191     if (usage1 == usage2) return TRUE;
192     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
193     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
194
195     return FALSE;
196 }
197
198 BOOL vshader_get_input(
199     IWineD3DVertexShader* iface,
200     BYTE usage_req, BYTE usage_idx_req,
201     unsigned int* regnum) {
202
203     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
204     int i;
205
206     for (i = 0; i < MAX_ATTRIBS; i++) {
207         DWORD usage_token = This->semantics_in[i].usage;
208         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
209         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
210
211         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
212             *regnum = i;
213             return TRUE;
214         }
215     }
216     return FALSE;
217 }
218
219 /* *******************************************
220    IWineD3DVertexShader IUnknown parts follow
221    ******************************************* */
222 static HRESULT  WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
223     TRACE("iface %p, riid %s, ppobj %p\n", iface, debugstr_guid(riid), ppobj);
224
225     if (IsEqualGUID(riid, &IID_IWineD3DVertexShader)
226             || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
227             || IsEqualGUID(riid, &IID_IWineD3DBase)
228             || IsEqualGUID(riid, &IID_IUnknown))
229     {
230         IUnknown_AddRef(iface);
231         *ppobj = iface;
232         return S_OK;
233     }
234
235     WARN("%s not implemented, returning E_NOINTERFACE\n", debugstr_guid(riid));
236
237     *ppobj = NULL;
238     return E_NOINTERFACE;
239 }
240
241 static ULONG  WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
242     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
243     ULONG refcount = InterlockedIncrement(&This->baseShader.ref);
244
245     TRACE("%p increasing refcount to %u\n", This, refcount);
246
247     return refcount;
248 }
249
250 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
251     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
252     ULONG refcount = InterlockedDecrement(&This->baseShader.ref);
253
254     TRACE("%p decreasing refcount to %u\n", This, refcount);
255
256     if (!refcount)
257     {
258         shader_cleanup((IWineD3DBaseShader *)iface);
259         HeapFree(GetProcessHeap(), 0, This);
260     }
261
262     return refcount;
263 }
264
265 /* *******************************************
266    IWineD3DVertexShader IWineD3DVertexShader parts follow
267    ******************************************* */
268
269 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
270     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
271     
272     *parent = This->parent;
273     IUnknown_AddRef(*parent);
274     TRACE("(%p) : returning %p\n", This, *parent);
275     return WINED3D_OK;
276 }
277
278 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
279     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
280     IWineD3DDevice_AddRef(This->baseShader.device);
281     *pDevice = This->baseShader.device;
282     TRACE("(%p) returning %p\n", This, *pDevice);
283     return WINED3D_OK;
284 }
285
286 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
287     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
288     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
289
290     if (NULL == pData) {
291         *pSizeOfData = This->baseShader.functionLength;
292         return WINED3D_OK;
293     }
294     if (*pSizeOfData < This->baseShader.functionLength) {
295         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
296          * than the required size we should write the required size and
297          * return D3DERR_MOREDATA. That's not actually true. */
298         return WINED3DERR_INVALIDCALL;
299     }
300
301     TRACE("(%p) : GetFunction copying to %p\n", This, pData);
302     memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
303
304     return WINED3D_OK;
305 }
306
307 /* Note that for vertex shaders CompileShader isn't called until the
308  * shader is first used. The reason for this is that we need the vertex
309  * declaration the shader will be used with in order to determine if
310  * the data in a register is of type D3DCOLOR, and needs swizzling. */
311 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
312
313     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
314     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
315     HRESULT hr;
316     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
317
318     TRACE("(%p) : pFunction %p\n", iface, pFunction);
319
320     /* First pass: trace shader */
321     if (TRACE_ON(d3d_shader)) shader_trace_init(pFunction, This->baseShader.shader_ins);
322
323     /* Initialize immediate constant lists */
324     list_init(&This->baseShader.constantsF);
325     list_init(&This->baseShader.constantsB);
326     list_init(&This->baseShader.constantsI);
327
328     /* Second pass: figure out registers used, semantics, etc.. */
329     This->min_rel_offset = GL_LIMITS(vshader_constantsF);
330     This->max_rel_offset = 0;
331     memset(reg_maps, 0, sizeof(shader_reg_maps));
332     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
333             This->semantics_in, This->semantics_out, pFunction);
334     if (hr != WINED3D_OK) return hr;
335
336     vshader_set_limits(This);
337
338     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
339
340     if(deviceImpl->vs_selected_mode == SHADER_ARB &&
341        (GLINFO_LOCATION).arb_vs_offset_limit      &&
342        This->min_rel_offset <= This->max_rel_offset) {
343
344         if(This->max_rel_offset - This->min_rel_offset > 127) {
345             FIXME("The difference between the minimum and maximum relative offset is > 127\n");
346             FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
347             FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
348         } else if(This->max_rel_offset - This->min_rel_offset > 63) {
349             This->rel_offset = This->min_rel_offset + 63;
350         } else if(This->max_rel_offset > 63) {
351             This->rel_offset = This->min_rel_offset;
352         } else {
353             This->rel_offset = 0;
354         }
355     }
356     This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
357
358     /* copy the function ... because it will certainly be released by application */
359     This->baseShader.function = HeapAlloc(GetProcessHeap(), 0, This->baseShader.functionLength);
360     if (!This->baseShader.function) return E_OUTOFMEMORY;
361     memcpy(This->baseShader.function, pFunction, This->baseShader.functionLength);
362
363     return WINED3D_OK;
364 }
365
366 /* Preload semantics for d3d8 shaders */
367 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
368     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
369     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
370
371     int i;
372     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
373         const WINED3DVERTEXELEMENT *element = vdecl->pDeclarationWine + i;
374         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
375     }
376 }
377
378 /* Set local constants for d3d8 shaders */
379 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
380         UINT start_idx, const float *src_data, UINT count) {
381     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
382     UINT i, end_idx;
383
384     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
385
386     end_idx = start_idx + count;
387     if (end_idx > GL_LIMITS(vshader_constantsF)) {
388         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
389         end_idx = GL_LIMITS(vshader_constantsF);
390     }
391
392     for (i = start_idx; i < end_idx; ++i) {
393         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
394         if (!lconst) return E_OUTOFMEMORY;
395
396         lconst->idx = i;
397         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
398         list_add_head(&This->baseShader.constantsF, &lconst->entry);
399     }
400
401     return WINED3D_OK;
402 }
403
404 static GLuint vertexshader_compile(IWineD3DVertexShaderImpl *This, const struct vs_compile_args *args) {
405     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
406     SHADER_BUFFER buffer;
407     GLuint ret;
408
409     /* Generate the HW shader */
410     TRACE("(%p) : Generating hardware program\n", This);
411     shader_buffer_init(&buffer);
412     This->cur_args = args;
413     ret = deviceImpl->shader_backend->shader_generate_vshader((IWineD3DVertexShader *)This, &buffer, args);
414     This->cur_args = NULL;
415     shader_buffer_free(&buffer);
416
417     return ret;
418 }
419
420 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
421 {
422     /*** IUnknown methods ***/
423     IWineD3DVertexShaderImpl_QueryInterface,
424     IWineD3DVertexShaderImpl_AddRef,
425     IWineD3DVertexShaderImpl_Release,
426     /*** IWineD3DBase methods ***/
427     IWineD3DVertexShaderImpl_GetParent,
428     /*** IWineD3DBaseShader methods ***/
429     IWineD3DVertexShaderImpl_SetFunction,
430     /*** IWineD3DVertexShader methods ***/
431     IWineD3DVertexShaderImpl_GetDevice,
432     IWineD3DVertexShaderImpl_GetFunction,
433     IWineD3DVertexShaderImpl_FakeSemantics,
434     IWIneD3DVertexShaderImpl_SetLocalConstantsF
435 };
436
437 void find_vs_compile_args(IWineD3DVertexShaderImpl *shader, IWineD3DStateBlockImpl *stateblock, struct vs_compile_args *args) {
438     args->fog_src = stateblock->renderState[WINED3DRS_FOGTABLEMODE] == WINED3DFOG_NONE ? VS_FOG_COORD : VS_FOG_Z;
439     args->swizzle_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.swizzle_map;
440 }
441
442 static inline BOOL vs_args_equal(const struct vs_compile_args *stored, const struct vs_compile_args *new,
443                                  const DWORD use_map) {
444     if((stored->swizzle_map & use_map) != new->swizzle_map) return FALSE;
445     return stored->fog_src == new->fog_src;
446 }
447
448 GLuint find_gl_vshader(IWineD3DVertexShaderImpl *shader, const struct vs_compile_args *args)
449 {
450     UINT i;
451     DWORD new_size = shader->shader_array_size;
452     struct vs_compiled_shader *new_array;
453     DWORD use_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.use_map;
454
455     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
456      * so a linear search is more performant than a hashmap or a binary search
457      * (cache coherency etc)
458      */
459     for(i = 0; i < shader->num_gl_shaders; i++) {
460         if(vs_args_equal(&shader->gl_shaders[i].args, args, use_map)) {
461             return shader->gl_shaders[i].prgId;
462         }
463     }
464
465     TRACE("No matching GL shader found, compiling a new shader\n");
466
467     if(shader->shader_array_size == shader->num_gl_shaders) {
468         if(shader->gl_shaders) {
469             new_size = shader->shader_array_size + max(1, shader->shader_array_size / 2);
470             new_array = HeapReAlloc(GetProcessHeap(), 0, shader->gl_shaders,
471                                     new_size * sizeof(*shader->gl_shaders));
472         } else {
473             new_array = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader->gl_shaders));
474             new_size = 1;
475         }
476
477         if(!new_array) {
478             ERR("Out of memory\n");
479             return 0;
480         }
481         shader->gl_shaders = new_array;
482         shader->shader_array_size = new_size;
483     }
484
485     shader->gl_shaders[shader->num_gl_shaders].args = *args;
486     shader->gl_shaders[shader->num_gl_shaders].prgId = vertexshader_compile(shader, args);
487     return shader->gl_shaders[shader->num_gl_shaders++].prgId;
488 }