wined3d: Merge wined3d_private_types.h into wined3d_private.h.
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with this library; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24  */
25
26 #include "config.h"
27
28 #include <math.h>
29 #include <stdio.h>
30
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34
35 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
36
37 /* This table is not order or position dependent. */
38 const SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] =
39 {
40     /* Arithmetic */
41     {WINED3DSIO_NOP,     0, 0, WINED3DSIH_NOP,        0,                      0                     },
42     {WINED3DSIO_MOV,     1, 2, WINED3DSIH_MOV,        0,                      0                     },
43     {WINED3DSIO_MOVA,    1, 2, WINED3DSIH_MOVA,       WINED3DVS_VERSION(2,0), -1                    },
44     {WINED3DSIO_ADD,     1, 3, WINED3DSIH_ADD,        0,                      0                     },
45     {WINED3DSIO_SUB,     1, 3, WINED3DSIH_SUB,        0,                      0                     },
46     {WINED3DSIO_MAD,     1, 4, WINED3DSIH_MAD,        0,                      0                     },
47     {WINED3DSIO_MUL,     1, 3, WINED3DSIH_MUL,        0,                      0                     },
48     {WINED3DSIO_RCP,     1, 2, WINED3DSIH_RCP,        0,                      0                     },
49     {WINED3DSIO_RSQ,     1, 2, WINED3DSIH_RSQ,        0,                      0                     },
50     {WINED3DSIO_DP3,     1, 3, WINED3DSIH_DP3,        0,                      0                     },
51     {WINED3DSIO_DP4,     1, 3, WINED3DSIH_DP4,        0,                      0                     },
52     {WINED3DSIO_MIN,     1, 3, WINED3DSIH_MIN,        0,                      0                     },
53     {WINED3DSIO_MAX,     1, 3, WINED3DSIH_MAX,        0,                      0                     },
54     {WINED3DSIO_SLT,     1, 3, WINED3DSIH_SLT,        0,                      0                     },
55     {WINED3DSIO_SGE,     1, 3, WINED3DSIH_SGE,        0,                      0                     },
56     {WINED3DSIO_ABS,     1, 2, WINED3DSIH_ABS,        0,                      0                     },
57     {WINED3DSIO_EXP,     1, 2, WINED3DSIH_EXP,        0,                      0                     },
58     {WINED3DSIO_LOG,     1, 2, WINED3DSIH_LOG,        0,                      0                     },
59     {WINED3DSIO_EXPP,    1, 2, WINED3DSIH_EXPP,       0,                      0                     },
60     {WINED3DSIO_LOGP,    1, 2, WINED3DSIH_LOGP,       0,                      0                     },
61     {WINED3DSIO_LIT,     1, 2, WINED3DSIH_LIT,        0,                      0                     },
62     {WINED3DSIO_DST,     1, 3, WINED3DSIH_DST,        0,                      0                     },
63     {WINED3DSIO_LRP,     1, 4, WINED3DSIH_LRP,        0,                      0                     },
64     {WINED3DSIO_FRC,     1, 2, WINED3DSIH_FRC,        0,                      0                     },
65     {WINED3DSIO_POW,     1, 3, WINED3DSIH_POW,        0,                      0                     },
66     {WINED3DSIO_CRS,     1, 3, WINED3DSIH_CRS,        0,                      0                     },
67     {WINED3DSIO_SGN,     1, 2, WINED3DSIH_SGN,        0,                      0                     },
68     {WINED3DSIO_NRM,     1, 2, WINED3DSIH_NRM,        0,                      0                     },
69     {WINED3DSIO_SINCOS,  1, 4, WINED3DSIH_SINCOS,     WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
70     {WINED3DSIO_SINCOS,  1, 2, WINED3DSIH_SINCOS,     WINED3DVS_VERSION(3,0), -1                    },
71     /* Matrix */
72     {WINED3DSIO_M4x4,    1, 3, WINED3DSIH_M4x4,       0,                      0                     },
73     {WINED3DSIO_M4x3,    1, 3, WINED3DSIH_M4x3,       0,                      0                     },
74     {WINED3DSIO_M3x4,    1, 3, WINED3DSIH_M3x4,       0,                      0                     },
75     {WINED3DSIO_M3x3,    1, 3, WINED3DSIH_M3x3,       0,                      0                     },
76     {WINED3DSIO_M3x2,    1, 3, WINED3DSIH_M3x2,       0,                      0                     },
77     /* Declare registers */
78     {WINED3DSIO_DCL,     0, 2, WINED3DSIH_DCL,        0,                      0                     },
79     /* Constant definitions */
80     {WINED3DSIO_DEF,     1, 5, WINED3DSIH_DEF,        0,                      0                     },
81     {WINED3DSIO_DEFB,    1, 2, WINED3DSIH_DEFB,       0,                      0                     },
82     {WINED3DSIO_DEFI,    1, 5, WINED3DSIH_DEFI,       0,                      0                     },
83     /* Flow control */
84     {WINED3DSIO_REP,     0, 1, WINED3DSIH_REP,        WINED3DVS_VERSION(2,0), -1                    },
85     {WINED3DSIO_ENDREP,  0, 0, WINED3DSIH_ENDREP,     WINED3DVS_VERSION(2,0), -1                    },
86     {WINED3DSIO_IF,      0, 1, WINED3DSIH_IF,         WINED3DVS_VERSION(2,0), -1                    },
87     {WINED3DSIO_IFC,     0, 2, WINED3DSIH_IFC,        WINED3DVS_VERSION(2,1), -1                    },
88     {WINED3DSIO_ELSE,    0, 0, WINED3DSIH_ELSE,       WINED3DVS_VERSION(2,0), -1                    },
89     {WINED3DSIO_ENDIF,   0, 0, WINED3DSIH_ENDIF,      WINED3DVS_VERSION(2,0), -1                    },
90     {WINED3DSIO_BREAK,   0, 0, WINED3DSIH_BREAK,      WINED3DVS_VERSION(2,1), -1                    },
91     {WINED3DSIO_BREAKC,  0, 2, WINED3DSIH_BREAKC,     WINED3DVS_VERSION(2,1), -1                    },
92     {WINED3DSIO_BREAKP,  0, 1, WINED3DSIH_BREAKP,     0,                      0                     },
93     {WINED3DSIO_CALL,    0, 1, WINED3DSIH_CALL,       WINED3DVS_VERSION(2,0), -1                    },
94     {WINED3DSIO_CALLNZ,  0, 2, WINED3DSIH_CALLNZ,     WINED3DVS_VERSION(2,0), -1                    },
95     {WINED3DSIO_LOOP,    0, 2, WINED3DSIH_LOOP,       WINED3DVS_VERSION(2,0), -1                    },
96     {WINED3DSIO_RET,     0, 0, WINED3DSIH_RET,        WINED3DVS_VERSION(2,0), -1                    },
97     {WINED3DSIO_ENDLOOP, 0, 0, WINED3DSIH_ENDLOOP,    WINED3DVS_VERSION(2,0), -1                    },
98     {WINED3DSIO_LABEL,   0, 1, WINED3DSIH_LABEL,      WINED3DVS_VERSION(2,0), -1                    },
99
100     {WINED3DSIO_SETP,    1, 3, WINED3DSIH_SETP,       0,                      0                     },
101     {WINED3DSIO_TEXLDL,  1, 3, WINED3DSIH_TEXLDL,     WINED3DVS_VERSION(3,0), -1                    },
102     {0,                  0, 0, WINED3DSIH_TABLE_SIZE, 0,                      0                     },
103 };
104
105 static void vshader_set_limits(
106       IWineD3DVertexShaderImpl *This) {
107
108       This->baseShader.limits.texcoord = 0;
109       This->baseShader.limits.attributes = 16;
110       This->baseShader.limits.packed_input = 0;
111
112       switch (This->baseShader.reg_maps.shader_version)
113       {
114           case WINED3DVS_VERSION(1,0):
115           case WINED3DVS_VERSION(1,1):
116                    This->baseShader.limits.temporary = 12;
117                    This->baseShader.limits.constant_bool = 0;
118                    This->baseShader.limits.constant_int = 0;
119                    This->baseShader.limits.address = 1;
120                    This->baseShader.limits.packed_output = 0;
121                    This->baseShader.limits.sampler = 0;
122                    This->baseShader.limits.label = 0;
123                    /* TODO: vs_1_1 has a minimum of 96 constants. What happens if a vs_1_1 shader is used
124                     * on a vs_3_0 capable card that has 256 constants?
125                     */
126                    This->baseShader.limits.constant_float = min(256, GL_LIMITS(vshader_constantsF));
127                    break;
128
129           case WINED3DVS_VERSION(2,0):
130           case WINED3DVS_VERSION(2,1):
131                    This->baseShader.limits.temporary = 12;
132                    This->baseShader.limits.constant_bool = 16;
133                    This->baseShader.limits.constant_int = 16;
134                    This->baseShader.limits.address = 1;
135                    This->baseShader.limits.packed_output = 0;
136                    This->baseShader.limits.sampler = 0;
137                    This->baseShader.limits.label = 16;
138                    This->baseShader.limits.constant_float = min(256, GL_LIMITS(vshader_constantsF));
139                    break;
140
141           case WINED3DVS_VERSION(3,0):
142                    This->baseShader.limits.temporary = 32;
143                    This->baseShader.limits.constant_bool = 32;
144                    This->baseShader.limits.constant_int = 32;
145                    This->baseShader.limits.address = 1;
146                    This->baseShader.limits.packed_output = 12;
147                    This->baseShader.limits.sampler = 4;
148                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
149                    /* DX10 cards on Windows advertise a d3d9 constant limit of 256 even though they are capable
150                     * of supporting much more(GL drivers advertise 1024). d3d9.dll and d3d8.dll clamp the
151                     * wined3d-advertised maximum. Clamp the constant limit for <= 3.0 shaders to 256.s
152                     * use constant buffers
153                     */
154                    This->baseShader.limits.constant_float = min(256, GL_LIMITS(vshader_constantsF));
155                    break;
156
157           default: This->baseShader.limits.temporary = 12;
158                    This->baseShader.limits.constant_bool = 16;
159                    This->baseShader.limits.constant_int = 16;
160                    This->baseShader.limits.address = 1;
161                    This->baseShader.limits.packed_output = 0;
162                    This->baseShader.limits.sampler = 0;
163                    This->baseShader.limits.label = 16;
164                    This->baseShader.limits.constant_float = min(256, GL_LIMITS(vshader_constantsF));
165                    FIXME("Unrecognized vertex shader version %#x\n",
166                            This->baseShader.reg_maps.shader_version);
167       }
168 }
169
170 /* This is an internal function,
171  * used to create fake semantics for shaders
172  * that don't have them - d3d8 shaders where the declaration
173  * stores the register for each input
174  */
175 static void vshader_set_input(
176     IWineD3DVertexShaderImpl* This,
177     unsigned int regnum,
178     BYTE usage, BYTE usage_idx) {
179
180     This->semantics_in[regnum].usage = usage;
181     This->semantics_in[regnum].usage_idx = usage_idx;
182     This->semantics_in[regnum].reg.register_type = WINED3DSPR_INPUT;
183     This->semantics_in[regnum].reg.register_idx = regnum;
184     This->semantics_in[regnum].reg.write_mask = WINED3DSP_WRITEMASK_ALL;
185     This->semantics_in[regnum].reg.modifiers = 0;
186     This->semantics_in[regnum].reg.shift = 0;
187     This->semantics_in[regnum].reg.rel_addr = NULL;
188 }
189
190 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
191     if (usage_idx1 != usage_idx2) return FALSE;
192     if (usage1 == usage2) return TRUE;
193     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
194     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
195
196     return FALSE;
197 }
198
199 BOOL vshader_get_input(
200     IWineD3DVertexShader* iface,
201     BYTE usage_req, BYTE usage_idx_req,
202     unsigned int* regnum) {
203
204     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
205     int i;
206
207     for (i = 0; i < MAX_ATTRIBS; i++) {
208         if (!This->baseShader.reg_maps.attributes[i]) continue;
209
210         if (match_usage(This->semantics_in[i].usage,
211                 This->semantics_in[i].usage_idx, usage_req, usage_idx_req))
212         {
213             *regnum = i;
214             return TRUE;
215         }
216     }
217     return FALSE;
218 }
219
220 /* *******************************************
221    IWineD3DVertexShader IUnknown parts follow
222    ******************************************* */
223 static HRESULT  WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
224     TRACE("iface %p, riid %s, ppobj %p\n", iface, debugstr_guid(riid), ppobj);
225
226     if (IsEqualGUID(riid, &IID_IWineD3DVertexShader)
227             || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
228             || IsEqualGUID(riid, &IID_IWineD3DBase)
229             || IsEqualGUID(riid, &IID_IUnknown))
230     {
231         IUnknown_AddRef(iface);
232         *ppobj = iface;
233         return S_OK;
234     }
235
236     WARN("%s not implemented, returning E_NOINTERFACE\n", debugstr_guid(riid));
237
238     *ppobj = NULL;
239     return E_NOINTERFACE;
240 }
241
242 static ULONG  WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
243     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
244     ULONG refcount = InterlockedIncrement(&This->baseShader.ref);
245
246     TRACE("%p increasing refcount to %u\n", This, refcount);
247
248     return refcount;
249 }
250
251 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
252     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
253     ULONG refcount = InterlockedDecrement(&This->baseShader.ref);
254
255     TRACE("%p decreasing refcount to %u\n", This, refcount);
256
257     if (!refcount)
258     {
259         shader_cleanup((IWineD3DBaseShader *)iface);
260         HeapFree(GetProcessHeap(), 0, This);
261     }
262
263     return refcount;
264 }
265
266 /* *******************************************
267    IWineD3DVertexShader IWineD3DVertexShader parts follow
268    ******************************************* */
269
270 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
271     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
272     
273     *parent = This->parent;
274     IUnknown_AddRef(*parent);
275     TRACE("(%p) : returning %p\n", This, *parent);
276     return WINED3D_OK;
277 }
278
279 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
280     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
281     IWineD3DDevice_AddRef(This->baseShader.device);
282     *pDevice = This->baseShader.device;
283     TRACE("(%p) returning %p\n", This, *pDevice);
284     return WINED3D_OK;
285 }
286
287 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
288     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
289     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
290
291     if (NULL == pData) {
292         *pSizeOfData = This->baseShader.functionLength;
293         return WINED3D_OK;
294     }
295     if (*pSizeOfData < This->baseShader.functionLength) {
296         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
297          * than the required size we should write the required size and
298          * return D3DERR_MOREDATA. That's not actually true. */
299         return WINED3DERR_INVALIDCALL;
300     }
301
302     TRACE("(%p) : GetFunction copying to %p\n", This, pData);
303     memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
304
305     return WINED3D_OK;
306 }
307
308 /* Note that for vertex shaders CompileShader isn't called until the
309  * shader is first used. The reason for this is that we need the vertex
310  * declaration the shader will be used with in order to determine if
311  * the data in a register is of type D3DCOLOR, and needs swizzling. */
312 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
313
314     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
315     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
316     HRESULT hr;
317     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
318
319     TRACE("(%p) : pFunction %p\n", iface, pFunction);
320
321     /* First pass: trace shader */
322     if (TRACE_ON(d3d_shader)) shader_trace_init(pFunction, This->baseShader.shader_ins);
323
324     /* Initialize immediate constant lists */
325     list_init(&This->baseShader.constantsF);
326     list_init(&This->baseShader.constantsB);
327     list_init(&This->baseShader.constantsI);
328
329     /* Second pass: figure out registers used, semantics, etc.. */
330     This->min_rel_offset = GL_LIMITS(vshader_constantsF);
331     This->max_rel_offset = 0;
332     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
333             This->semantics_in, This->semantics_out, pFunction);
334     if (hr != WINED3D_OK) return hr;
335
336     vshader_set_limits(This);
337
338     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
339
340     if(deviceImpl->vs_selected_mode == SHADER_ARB &&
341        (GLINFO_LOCATION).arb_vs_offset_limit      &&
342        This->min_rel_offset <= This->max_rel_offset) {
343
344         if(This->max_rel_offset - This->min_rel_offset > 127) {
345             FIXME("The difference between the minimum and maximum relative offset is > 127\n");
346             FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
347             FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
348         } else if(This->max_rel_offset - This->min_rel_offset > 63) {
349             This->rel_offset = This->min_rel_offset + 63;
350         } else if(This->max_rel_offset > 63) {
351             This->rel_offset = This->min_rel_offset;
352         } else {
353             This->rel_offset = 0;
354         }
355     }
356     This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
357
358     /* copy the function ... because it will certainly be released by application */
359     This->baseShader.function = HeapAlloc(GetProcessHeap(), 0, This->baseShader.functionLength);
360     if (!This->baseShader.function) return E_OUTOFMEMORY;
361     memcpy(This->baseShader.function, pFunction, This->baseShader.functionLength);
362
363     return WINED3D_OK;
364 }
365
366 /* Preload semantics for d3d8 shaders */
367 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
368     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
369     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
370
371     unsigned int i;
372     for (i = 0; i < vdecl->element_count; ++i)
373     {
374         const struct wined3d_vertex_declaration_element *e = &vdecl->elements[i];
375         vshader_set_input(This, e->output_slot, e->usage, e->usage_idx);
376     }
377 }
378
379 /* Set local constants for d3d8 shaders */
380 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
381         UINT start_idx, const float *src_data, UINT count) {
382     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
383     UINT i, end_idx;
384
385     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
386
387     end_idx = start_idx + count;
388     if (end_idx > GL_LIMITS(vshader_constantsF)) {
389         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
390         end_idx = GL_LIMITS(vshader_constantsF);
391     }
392
393     for (i = start_idx; i < end_idx; ++i) {
394         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
395         if (!lconst) return E_OUTOFMEMORY;
396
397         lconst->idx = i;
398         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
399         list_add_head(&This->baseShader.constantsF, &lconst->entry);
400     }
401
402     return WINED3D_OK;
403 }
404
405 static GLuint vertexshader_compile(IWineD3DVertexShaderImpl *This, const struct vs_compile_args *args) {
406     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
407     SHADER_BUFFER buffer;
408     GLuint ret;
409
410     /* Generate the HW shader */
411     TRACE("(%p) : Generating hardware program\n", This);
412     shader_buffer_init(&buffer);
413     This->cur_args = args;
414     ret = deviceImpl->shader_backend->shader_generate_vshader((IWineD3DVertexShader *)This, &buffer, args);
415     This->cur_args = NULL;
416     shader_buffer_free(&buffer);
417
418     return ret;
419 }
420
421 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
422 {
423     /*** IUnknown methods ***/
424     IWineD3DVertexShaderImpl_QueryInterface,
425     IWineD3DVertexShaderImpl_AddRef,
426     IWineD3DVertexShaderImpl_Release,
427     /*** IWineD3DBase methods ***/
428     IWineD3DVertexShaderImpl_GetParent,
429     /*** IWineD3DBaseShader methods ***/
430     IWineD3DVertexShaderImpl_SetFunction,
431     /*** IWineD3DVertexShader methods ***/
432     IWineD3DVertexShaderImpl_GetDevice,
433     IWineD3DVertexShaderImpl_GetFunction,
434     IWineD3DVertexShaderImpl_FakeSemantics,
435     IWIneD3DVertexShaderImpl_SetLocalConstantsF
436 };
437
438 void find_vs_compile_args(IWineD3DVertexShaderImpl *shader, IWineD3DStateBlockImpl *stateblock, struct vs_compile_args *args) {
439     args->fog_src = stateblock->renderState[WINED3DRS_FOGTABLEMODE] == WINED3DFOG_NONE ? VS_FOG_COORD : VS_FOG_Z;
440     args->swizzle_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.swizzle_map;
441 }
442
443 static inline BOOL vs_args_equal(const struct vs_compile_args *stored, const struct vs_compile_args *new,
444                                  const DWORD use_map) {
445     if((stored->swizzle_map & use_map) != new->swizzle_map) return FALSE;
446     return stored->fog_src == new->fog_src;
447 }
448
449 GLuint find_gl_vshader(IWineD3DVertexShaderImpl *shader, const struct vs_compile_args *args)
450 {
451     UINT i;
452     DWORD new_size = shader->shader_array_size;
453     struct vs_compiled_shader *new_array;
454     DWORD use_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.use_map;
455
456     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
457      * so a linear search is more performant than a hashmap or a binary search
458      * (cache coherency etc)
459      */
460     for(i = 0; i < shader->num_gl_shaders; i++) {
461         if(vs_args_equal(&shader->gl_shaders[i].args, args, use_map)) {
462             return shader->gl_shaders[i].prgId;
463         }
464     }
465
466     TRACE("No matching GL shader found, compiling a new shader\n");
467
468     if(shader->shader_array_size == shader->num_gl_shaders) {
469         if (shader->num_gl_shaders)
470         {
471             new_size = shader->shader_array_size + max(1, shader->shader_array_size / 2);
472             new_array = HeapReAlloc(GetProcessHeap(), 0, shader->gl_shaders,
473                                     new_size * sizeof(*shader->gl_shaders));
474         } else {
475             new_array = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader->gl_shaders));
476             new_size = 1;
477         }
478
479         if(!new_array) {
480             ERR("Out of memory\n");
481             return 0;
482         }
483         shader->gl_shaders = new_array;
484         shader->shader_array_size = new_size;
485     }
486
487     shader->gl_shaders[shader->num_gl_shaders].args = *args;
488     shader->gl_shaders[shader->num_gl_shaders].prgId = vertexshader_compile(shader, args);
489     return shader->gl_shaders[shader->num_gl_shaders++].prgId;
490 }