2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2004 Christian Costa
7 * Copyright 2005 Oliver Stieber
8 * Copyright 2006 Ivan Gyurdiev
9 * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
31 #include "wined3d_private.h"
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
35 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
37 /* TODO: Vertex and Pixel shaders are almost identical, the only exception being the way that some of the data is looked up or the availability of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
38 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't surprise me if the program can be cross compiled using a large body of shared code */
40 #define GLNAME_REQUIRE_GLSL ((const char *)1)
42 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
43 /* This table is not order or position dependent. */
46 {WINED3DSIO_NOP, "nop", "NOP", 0, 0, WINED3DSIH_NOP, 0, 0 },
47 {WINED3DSIO_MOV, "mov", "MOV", 1, 2, WINED3DSIH_MOV, 0, 0 },
48 {WINED3DSIO_MOVA, "mova", NULL, 1, 2, WINED3DSIH_MOVA, WINED3DVS_VERSION(2,0), -1 },
49 {WINED3DSIO_ADD, "add", "ADD", 1, 3, WINED3DSIH_ADD, 0, 0 },
50 {WINED3DSIO_SUB, "sub", "SUB", 1, 3, WINED3DSIH_SUB, 0, 0 },
51 {WINED3DSIO_MAD, "mad", "MAD", 1, 4, WINED3DSIH_MAD, 0, 0 },
52 {WINED3DSIO_MUL, "mul", "MUL", 1, 3, WINED3DSIH_MUL, 0, 0 },
53 {WINED3DSIO_RCP, "rcp", "RCP", 1, 2, WINED3DSIH_RCP, 0, 0 },
54 {WINED3DSIO_RSQ, "rsq", "RSQ", 1, 2, WINED3DSIH_RSQ, 0, 0 },
55 {WINED3DSIO_DP3, "dp3", "DP3", 1, 3, WINED3DSIH_DP3, 0, 0 },
56 {WINED3DSIO_DP4, "dp4", "DP4", 1, 3, WINED3DSIH_DP4, 0, 0 },
57 {WINED3DSIO_MIN, "min", "MIN", 1, 3, WINED3DSIH_MIN, 0, 0 },
58 {WINED3DSIO_MAX, "max", "MAX", 1, 3, WINED3DSIH_MAX, 0, 0 },
59 {WINED3DSIO_SLT, "slt", "SLT", 1, 3, WINED3DSIH_SLT, 0, 0 },
60 {WINED3DSIO_SGE, "sge", "SGE", 1, 3, WINED3DSIH_SGE, 0, 0 },
61 {WINED3DSIO_ABS, "abs", "ABS", 1, 2, WINED3DSIH_ABS, 0, 0 },
62 {WINED3DSIO_EXP, "exp", "EX2", 1, 2, WINED3DSIH_EXP, 0, 0 },
63 {WINED3DSIO_LOG, "log", "LG2", 1, 2, WINED3DSIH_LOG, 0, 0 },
64 {WINED3DSIO_EXPP, "expp", "EXP", 1, 2, WINED3DSIH_EXPP, 0, 0 },
65 {WINED3DSIO_LOGP, "logp", "LOG", 1, 2, WINED3DSIH_LOGP, 0, 0 },
66 {WINED3DSIO_LIT, "lit", "LIT", 1, 2, WINED3DSIH_LIT, 0, 0 },
67 {WINED3DSIO_DST, "dst", "DST", 1, 3, WINED3DSIH_DST, 0, 0 },
68 {WINED3DSIO_LRP, "lrp", "LRP", 1, 4, WINED3DSIH_LRP, 0, 0 },
69 {WINED3DSIO_FRC, "frc", "FRC", 1, 2, WINED3DSIH_FRC, 0, 0 },
70 {WINED3DSIO_POW, "pow", "POW", 1, 3, WINED3DSIH_POW, 0, 0 },
71 {WINED3DSIO_CRS, "crs", "XPD", 1, 3, WINED3DSIH_CRS, 0, 0 },
72 /* TODO: sng can possibly be performed a s
75 {WINED3DSIO_SGN, "sgn", NULL, 1, 2, WINED3DSIH_SGN, 0, 0 },
76 {WINED3DSIO_NRM, "nrm", NULL, 1, 2, WINED3DSIH_NRM, 0, 0 },
77 {WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, WINED3DSIH_SINCOS, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
78 {WINED3DSIO_SINCOS, "sincos", "SCS", 1, 2, WINED3DSIH_SINCOS, WINED3DVS_VERSION(3,0), -1 },
80 {WINED3DSIO_M4x4, "m4x4", "undefined", 1, 3, WINED3DSIH_M4x4, 0, 0 },
81 {WINED3DSIO_M4x3, "m4x3", "undefined", 1, 3, WINED3DSIH_M4x3, 0, 0 },
82 {WINED3DSIO_M3x4, "m3x4", "undefined", 1, 3, WINED3DSIH_M3x4, 0, 0 },
83 {WINED3DSIO_M3x3, "m3x3", "undefined", 1, 3, WINED3DSIH_M3x3, 0, 0 },
84 {WINED3DSIO_M3x2, "m3x2", "undefined", 1, 3, WINED3DSIH_M3x2, 0, 0 },
85 /* Declare registers */
86 {WINED3DSIO_DCL, "dcl", NULL, 0, 2, WINED3DSIH_DCL, 0, 0 },
87 /* Constant definitions */
88 {WINED3DSIO_DEF, "def", NULL, 1, 5, WINED3DSIH_DEF, 0, 0 },
89 {WINED3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 1, 2, WINED3DSIH_DEFB, 0, 0 },
90 {WINED3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 1, 5, WINED3DSIH_DEFI, 0, 0 },
91 /* Flow control - requires GLSL or software shaders */
92 {WINED3DSIO_REP , "rep", NULL, 0, 1, WINED3DSIH_REP, WINED3DVS_VERSION(2,0), -1 },
93 {WINED3DSIO_ENDREP, "endrep", NULL, 0, 0, WINED3DSIH_ENDREP, WINED3DVS_VERSION(2,0), -1 },
94 {WINED3DSIO_IF, "if", NULL, 0, 1, WINED3DSIH_IF, WINED3DVS_VERSION(2,0), -1 },
95 {WINED3DSIO_IFC, "ifc", NULL, 0, 2, WINED3DSIH_IFC, WINED3DVS_VERSION(2,1), -1 },
96 {WINED3DSIO_ELSE, "else", NULL, 0, 0, WINED3DSIH_ELSE, WINED3DVS_VERSION(2,0), -1 },
97 {WINED3DSIO_ENDIF, "endif", NULL, 0, 0, WINED3DSIH_ENDIF, WINED3DVS_VERSION(2,0), -1 },
98 {WINED3DSIO_BREAK, "break", NULL, 0, 0, WINED3DSIH_BREAK, WINED3DVS_VERSION(2,1), -1 },
99 {WINED3DSIO_BREAKC, "breakc", NULL, 0, 2, WINED3DSIH_BREAKC, WINED3DVS_VERSION(2,1), -1 },
100 {WINED3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 0, 1, WINED3DSIH_BREAKP, 0, 0 },
101 {WINED3DSIO_CALL, "call", NULL, 0, 1, WINED3DSIH_CALL, WINED3DVS_VERSION(2,0), -1 },
102 {WINED3DSIO_CALLNZ, "callnz", NULL, 0, 2, WINED3DSIH_CALLNZ, WINED3DVS_VERSION(2,0), -1 },
103 {WINED3DSIO_LOOP, "loop", NULL, 0, 2, WINED3DSIH_LOOP, WINED3DVS_VERSION(2,0), -1 },
104 {WINED3DSIO_RET, "ret", NULL, 0, 0, WINED3DSIH_RET, WINED3DVS_VERSION(2,0), -1 },
105 {WINED3DSIO_ENDLOOP, "endloop", NULL, 0, 0, WINED3DSIH_ENDLOOP, WINED3DVS_VERSION(2,0), -1 },
106 {WINED3DSIO_LABEL, "label", NULL, 0, 1, WINED3DSIH_LABEL, WINED3DVS_VERSION(2,0), -1 },
108 {WINED3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 1, 3, WINED3DSIH_SETP, 0, 0 },
109 {WINED3DSIO_TEXLDL, "texldl", NULL, 1, 3, WINED3DSIH_TEXLDL, WINED3DVS_VERSION(3,0), -1 },
110 {0, NULL, NULL, 0, 0, 0, 0, 0 }
113 static void vshader_set_limits(
114 IWineD3DVertexShaderImpl *This) {
116 This->baseShader.limits.texcoord = 0;
117 This->baseShader.limits.attributes = 16;
118 This->baseShader.limits.packed_input = 0;
120 /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
121 This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
123 switch (This->baseShader.reg_maps.shader_version)
125 case WINED3DVS_VERSION(1,0):
126 case WINED3DVS_VERSION(1,1):
127 This->baseShader.limits.temporary = 12;
128 This->baseShader.limits.constant_bool = 0;
129 This->baseShader.limits.constant_int = 0;
130 This->baseShader.limits.address = 1;
131 This->baseShader.limits.packed_output = 0;
132 This->baseShader.limits.sampler = 0;
133 This->baseShader.limits.label = 0;
136 case WINED3DVS_VERSION(2,0):
137 case WINED3DVS_VERSION(2,1):
138 This->baseShader.limits.temporary = 12;
139 This->baseShader.limits.constant_bool = 16;
140 This->baseShader.limits.constant_int = 16;
141 This->baseShader.limits.address = 1;
142 This->baseShader.limits.packed_output = 0;
143 This->baseShader.limits.sampler = 0;
144 This->baseShader.limits.label = 16;
147 case WINED3DVS_VERSION(3,0):
148 This->baseShader.limits.temporary = 32;
149 This->baseShader.limits.constant_bool = 32;
150 This->baseShader.limits.constant_int = 32;
151 This->baseShader.limits.address = 1;
152 This->baseShader.limits.packed_output = 12;
153 This->baseShader.limits.sampler = 4;
154 This->baseShader.limits.label = 16; /* FIXME: 2048 */
157 default: This->baseShader.limits.temporary = 12;
158 This->baseShader.limits.constant_bool = 16;
159 This->baseShader.limits.constant_int = 16;
160 This->baseShader.limits.address = 1;
161 This->baseShader.limits.packed_output = 0;
162 This->baseShader.limits.sampler = 0;
163 This->baseShader.limits.label = 16;
164 FIXME("Unrecognized vertex shader version %#x\n",
165 This->baseShader.reg_maps.shader_version);
169 /* This is an internal function,
170 * used to create fake semantics for shaders
171 * that don't have them - d3d8 shaders where the declaration
172 * stores the register for each input
174 static void vshader_set_input(
175 IWineD3DVertexShaderImpl* This,
177 BYTE usage, BYTE usage_idx) {
179 /* Fake usage: set reserved bit, usage, usage_idx */
180 DWORD usage_token = (0x1 << 31) |
181 (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
183 /* Fake register; set reserved bit, regnum, type: input, wmask: all */
184 DWORD reg_token = (0x1 << 31) |
185 WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
187 This->semantics_in[regnum].usage = usage_token;
188 This->semantics_in[regnum].reg = reg_token;
191 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
192 if (usage_idx1 != usage_idx2) return FALSE;
193 if (usage1 == usage2) return TRUE;
194 if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
195 if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
200 BOOL vshader_get_input(
201 IWineD3DVertexShader* iface,
202 BYTE usage_req, BYTE usage_idx_req,
203 unsigned int* regnum) {
205 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
208 for (i = 0; i < MAX_ATTRIBS; i++) {
209 DWORD usage_token = This->semantics_in[i].usage;
210 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
211 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
213 if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
221 BOOL vshader_input_is_color(
222 IWineD3DVertexShader* iface,
223 unsigned int regnum) {
225 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
227 DWORD usage_token = This->semantics_in[regnum].usage;
228 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
229 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
233 for(i = 0; i < This->num_swizzled_attribs; i++) {
234 if(This->swizzled_attribs[i].usage == usage &&
235 This->swizzled_attribs[i].idx == usage_idx) {
242 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
244 UINT numoldswizzles = This->num_swizzled_attribs;
245 IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
247 DWORD usage_token, usage, usage_idx;
250 attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
252 /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
253 memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
255 memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
257 for(i = 0; i < decl->num_swizzled_attribs; i++) {
258 for(j = 0; j < MAX_ATTRIBS; j++) {
260 if(!This->baseShader.reg_maps.attributes[j]) continue;
262 usage_token = This->semantics_in[j].usage;
263 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
264 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
266 if(decl->swizzled_attribs[i].usage == usage &&
267 decl->swizzled_attribs[i].idx == usage_idx) {
268 This->swizzled_attribs[num].usage = usage;
269 This->swizzled_attribs[num].idx = usage_idx;
275 /* Add previously converted attributes back in if they are not defined in the current declaration */
276 for(i = 0; i < numoldswizzles; i++) {
279 for(j = 0; j < decl->declarationWNumElements; j++) {
280 if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
281 oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
286 /* This previously converted attribute is declared in the current declaration. Either it is
287 * already in the new array, or it should not be there. Skip it
291 /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
292 * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
293 * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
294 * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
295 * stays unswizzled as well because it isn't found in the oldswizzles array
297 for(j = 0; j < num; j++) {
298 if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
299 oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
300 oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
301 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
302 sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
306 This->swizzled_attribs[j].usage = oldswizzles[i].usage;
307 This->swizzled_attribs[j].idx = oldswizzles[i].idx;
311 TRACE("New swizzled attributes array\n");
312 for(i = 0; i < num; i++) {
313 TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
314 This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
316 This->num_swizzled_attribs = num;
318 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
319 or GLSL and send it to the card */
320 static void IWineD3DVertexShaderImpl_GenerateShader(IWineD3DVertexShader *iface,
321 const struct shader_reg_maps* reg_maps, const DWORD *pFunction)
323 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
324 IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
325 SHADER_BUFFER buffer;
327 find_swizzled_attribs(decl, This);
329 shader_buffer_init(&buffer);
330 ((IWineD3DDeviceImpl *)This->baseShader.device)->shader_backend->shader_generate_vshader(iface, &buffer);
331 shader_buffer_free(&buffer);
334 /* *******************************************
335 IWineD3DVertexShader IUnknown parts follow
336 ******************************************* */
337 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
338 TRACE("iface %p, riid %s, ppobj %p\n", iface, debugstr_guid(riid), ppobj);
340 if (IsEqualGUID(riid, &IID_IWineD3DVertexShader)
341 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
342 || IsEqualGUID(riid, &IID_IWineD3DBase)
343 || IsEqualGUID(riid, &IID_IUnknown))
345 IUnknown_AddRef(iface);
350 WARN("%s not implemented, returning E_NOINTERFACE\n", debugstr_guid(riid));
353 return E_NOINTERFACE;
356 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
357 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
358 ULONG refcount = InterlockedIncrement(&This->baseShader.ref);
360 TRACE("%p increasing refcount to %u\n", This, refcount);
365 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
366 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
367 ULONG refcount = InterlockedDecrement(&This->baseShader.ref);
369 TRACE("%p decreasing refcount to %u\n", This, refcount);
373 shader_cleanup((IWineD3DBaseShader *)iface);
374 HeapFree(GetProcessHeap(), 0, This);
380 /* *******************************************
381 IWineD3DVertexShader IWineD3DVertexShader parts follow
382 ******************************************* */
384 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
385 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
387 *parent = This->parent;
388 IUnknown_AddRef(*parent);
389 TRACE("(%p) : returning %p\n", This, *parent);
393 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
394 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
395 IWineD3DDevice_AddRef(This->baseShader.device);
396 *pDevice = This->baseShader.device;
397 TRACE("(%p) returning %p\n", This, *pDevice);
401 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
402 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
403 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
406 *pSizeOfData = This->baseShader.functionLength;
409 if (*pSizeOfData < This->baseShader.functionLength) {
410 /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
411 * than the required size we should write the required size and
412 * return D3DERR_MOREDATA. That's not actually true. */
413 return WINED3DERR_INVALIDCALL;
415 if (NULL == This->baseShader.function) { /* no function defined */
416 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
417 (*(DWORD **) pData) = NULL;
419 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
420 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
425 /* Note that for vertex shaders CompileShader isn't called until the
426 * shader is first used. The reason for this is that we need the vertex
427 * declaration the shader will be used with in order to determine if
428 * the data in a register is of type D3DCOLOR, and needs swizzling. */
429 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
431 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
432 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
434 shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
436 TRACE("(%p) : pFunction %p\n", iface, pFunction);
438 /* First pass: trace shader */
439 if (TRACE_ON(d3d_shader)) shader_trace_init(pFunction, This->baseShader.shader_ins);
441 /* Initialize immediate constant lists */
442 list_init(&This->baseShader.constantsF);
443 list_init(&This->baseShader.constantsB);
444 list_init(&This->baseShader.constantsI);
446 /* Second pass: figure out registers used, semantics, etc.. */
447 This->min_rel_offset = GL_LIMITS(vshader_constantsF);
448 This->max_rel_offset = 0;
449 memset(reg_maps, 0, sizeof(shader_reg_maps));
450 hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
451 This->semantics_in, This->semantics_out, pFunction);
452 if (hr != WINED3D_OK) return hr;
454 vshader_set_limits(This);
456 This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
458 if(deviceImpl->vs_selected_mode == SHADER_ARB &&
459 (GLINFO_LOCATION).arb_vs_offset_limit &&
460 This->min_rel_offset <= This->max_rel_offset) {
462 if(This->max_rel_offset - This->min_rel_offset > 127) {
463 FIXME("The difference between the minimum and maximum relative offset is > 127\n");
464 FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
465 FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
466 } else if(This->max_rel_offset - This->min_rel_offset > 63) {
467 This->rel_offset = This->min_rel_offset + 63;
468 } else if(This->max_rel_offset > 63) {
469 This->rel_offset = This->min_rel_offset;
471 This->rel_offset = 0;
474 This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
476 /* copy the function ... because it will certainly be released by application */
477 if (NULL != pFunction) {
480 function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
481 if (!function) return E_OUTOFMEMORY;
482 memcpy(function, pFunction, This->baseShader.functionLength);
483 This->baseShader.function = function;
485 This->baseShader.function = NULL;
491 /* Preload semantics for d3d8 shaders */
492 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
493 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
494 IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
497 for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
498 const WINED3DVERTEXELEMENT *element = vdecl->pDeclarationWine + i;
499 vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
503 /* Set local constants for d3d8 shaders */
504 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
505 UINT start_idx, const float *src_data, UINT count) {
506 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
509 TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
511 end_idx = start_idx + count;
512 if (end_idx > GL_LIMITS(vshader_constantsF)) {
513 WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
514 end_idx = GL_LIMITS(vshader_constantsF);
517 for (i = start_idx; i < end_idx; ++i) {
518 local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
519 if (!lconst) return E_OUTOFMEMORY;
522 memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
523 list_add_head(&This->baseShader.constantsF, &lconst->entry);
529 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
537 for(i = 0; i < vdecl->declarationWNumElements; i++) {
538 /* Ignore tesselated streams and the termination entry(position0, stream 255, unused) */
539 if(vdecl->pDeclarationWine[i].Stream >= MAX_STREAMS ||
540 vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_UNUSED) continue;
542 for(j = 0; j < MAX_ATTRIBS; j++) {
543 if(!This->baseShader.reg_maps.attributes[j]) continue;
545 usage_token = This->semantics_in[j].usage;
546 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
547 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
549 if(vdecl->pDeclarationWine[i].Usage != usage ||
550 vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
555 for(k = 0; k < This->num_swizzled_attribs; k++) {
556 if(This->swizzled_attribs[k].usage == usage &&
557 This->swizzled_attribs[k].idx == usage_idx) {
561 if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
562 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
563 debug_d3ddeclusage(usage), usage_idx);
566 if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
567 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
568 debug_d3ddeclusage(usage), usage_idx);
576 HRESULT IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
577 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
578 IWineD3DVertexDeclarationImpl *vdecl;
579 CONST DWORD *function = This->baseShader.function;
580 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
582 TRACE("(%p) : function %p\n", iface, function);
584 /* We're already compiled. */
585 if (This->baseShader.is_compiled) {
586 vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
588 if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
589 memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
591 /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
592 * we have to recompile, but we have to take a deeper look at see if the attribs that differ
593 * are declared in the decl and used in the shader
595 if(swizzled_attribs_differ(This, vdecl)) {
596 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
599 WARN("Swizzled attribute validation required an expensive comparison\n");
605 if(This->recompile_count < 50) {
606 This->recompile_count++;
608 FIXME("Vertexshader %p recompiled more than 50 times\n", This);
611 deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
614 /* We don't need to compile */
616 This->baseShader.is_compiled = TRUE;
620 /* Generate the HW shader */
621 TRACE("(%p) : Generating hardware program\n", This);
622 IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
624 This->baseShader.is_compiled = TRUE;
629 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
631 /*** IUnknown methods ***/
632 IWineD3DVertexShaderImpl_QueryInterface,
633 IWineD3DVertexShaderImpl_AddRef,
634 IWineD3DVertexShaderImpl_Release,
635 /*** IWineD3DBase methods ***/
636 IWineD3DVertexShaderImpl_GetParent,
637 /*** IWineD3DBaseShader methods ***/
638 IWineD3DVertexShaderImpl_SetFunction,
639 /*** IWineD3DVertexShader methods ***/
640 IWineD3DVertexShaderImpl_GetDevice,
641 IWineD3DVertexShaderImpl_GetFunction,
642 IWineD3DVertexShaderImpl_FakeSemantics,
643 IWIneD3DVertexShaderImpl_SetLocalConstantsF