2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2004 Christian Costa
7 * Copyright 2005 Oliver Stieber
8 * Copyright 2006 Ivan Gyurdiev
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "wined3d_private.h"
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
36 /* Shader debugging - Change the following line to enable debugging of software
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
43 # define TRACE_VSVECTOR(name)
47 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
48 * http://developer.nvidia.com/view.asp?IO=vstovp
50 * NVIDIA: Memory Management with VAR
51 * http://developer.nvidia.com/view.asp?IO=var_memory_management
54 /* TODO: Vertex and Pixel shaders are almost identical, the only exception being the way that some of the data is looked up or the availability of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
55 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't surprise me if the program can be cross compiled using a large body of shared code */
57 #define GLNAME_REQUIRE_GLSL ((const char *)1)
59 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
60 /* This table is not order or position dependent. */
63 {WINED3DSIO_NOP, "nop", "NOP", 0, 0, vshader_hw_map2gl, NULL, 0, 0},
64 {WINED3DSIO_MOV, "mov", "MOV", 1, 2, vshader_hw_map2gl, shader_glsl_mov, 0, 0},
65 {WINED3DSIO_MOVA, "mova", NULL, 1, 2, vshader_hw_map2gl, shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
66 {WINED3DSIO_ADD, "add", "ADD", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
67 {WINED3DSIO_SUB, "sub", "SUB", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
68 {WINED3DSIO_MAD, "mad", "MAD", 1, 4, vshader_hw_map2gl, shader_glsl_mad, 0, 0},
69 {WINED3DSIO_MUL, "mul", "MUL", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
70 {WINED3DSIO_RCP, "rcp", "RCP", 1, 2, vshader_hw_rsq_rcp, shader_glsl_rcp, 0, 0},
71 {WINED3DSIO_RSQ, "rsq", "RSQ", 1, 2, vshader_hw_rsq_rcp, shader_glsl_rsq, 0, 0},
72 {WINED3DSIO_DP3, "dp3", "DP3", 1, 3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
73 {WINED3DSIO_DP4, "dp4", "DP4", 1, 3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
74 {WINED3DSIO_MIN, "min", "MIN", 1, 3, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
75 {WINED3DSIO_MAX, "max", "MAX", 1, 3, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
76 {WINED3DSIO_SLT, "slt", "SLT", 1, 3, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
77 {WINED3DSIO_SGE, "sge", "SGE", 1, 3, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
78 {WINED3DSIO_ABS, "abs", "ABS", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
79 {WINED3DSIO_EXP, "exp", "EX2", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
80 {WINED3DSIO_LOG, "log", "LG2", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
81 {WINED3DSIO_EXPP, "expp", "EXP", 1, 2, vshader_hw_map2gl, shader_glsl_expp, 0, 0},
82 {WINED3DSIO_LOGP, "logp", "LOG", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
83 {WINED3DSIO_LIT, "lit", "LIT", 1, 2, vshader_hw_map2gl, shader_glsl_lit, 0, 0},
84 {WINED3DSIO_DST, "dst", "DST", 1, 3, vshader_hw_map2gl, shader_glsl_dst, 0, 0},
85 {WINED3DSIO_LRP, "lrp", "LRP", 1, 4, NULL, shader_glsl_lrp, 0, 0},
86 {WINED3DSIO_FRC, "frc", "FRC", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
87 {WINED3DSIO_POW, "pow", "POW", 1, 3, vshader_hw_map2gl, shader_glsl_pow, 0, 0},
88 {WINED3DSIO_CRS, "crs", "XPD", 1, 3, vshader_hw_map2gl, shader_glsl_cross, 0, 0},
89 /* TODO: sng can possibly be performed a s
92 {WINED3DSIO_SGN, "sgn", NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
93 {WINED3DSIO_NRM, "nrm", NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
94 {WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
95 {WINED3DSIO_SINCOS, "sincos", "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
97 {WINED3DSIO_M4x4, "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
98 {WINED3DSIO_M4x3, "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
99 {WINED3DSIO_M3x4, "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
100 {WINED3DSIO_M3x3, "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
101 {WINED3DSIO_M3x2, "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
102 /* Declare registers */
103 {WINED3DSIO_DCL, "dcl", NULL, 0, 2, NULL, NULL, 0, 0},
104 /* Constant definitions */
105 {WINED3DSIO_DEF, "def", NULL, 1, 5, NULL, NULL, 0, 0},
106 {WINED3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
107 {WINED3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
108 /* Flow control - requires GLSL or software shaders */
109 {WINED3DSIO_REP , "rep", NULL, 0, 1, NULL, shader_glsl_rep, WINED3DVS_VERSION(2,0), -1},
110 {WINED3DSIO_ENDREP, "endrep", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
111 {WINED3DSIO_IF, "if", NULL, 0, 1, NULL, shader_glsl_if, WINED3DVS_VERSION(2,0), -1},
112 {WINED3DSIO_IFC, "ifc", NULL, 0, 2, NULL, shader_glsl_ifc, WINED3DVS_VERSION(2,1), -1},
113 {WINED3DSIO_ELSE, "else", NULL, 0, 0, NULL, shader_glsl_else, WINED3DVS_VERSION(2,0), -1},
114 {WINED3DSIO_ENDIF, "endif", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
115 {WINED3DSIO_BREAK, "break", NULL, 0, 0, NULL, shader_glsl_break, WINED3DVS_VERSION(2,1), -1},
116 {WINED3DSIO_BREAKC, "breakc", NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
117 {WINED3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
118 {WINED3DSIO_CALL, "call", NULL, 0, 1, NULL, shader_glsl_call, WINED3DVS_VERSION(2,0), -1},
119 {WINED3DSIO_CALLNZ, "callnz", NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
120 {WINED3DSIO_LOOP, "loop", NULL, 0, 2, NULL, shader_glsl_loop, WINED3DVS_VERSION(2,0), -1},
121 {WINED3DSIO_RET, "ret", NULL, 0, 0, NULL, NULL, WINED3DVS_VERSION(2,0), -1},
122 {WINED3DSIO_ENDLOOP,"endloop", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
123 {WINED3DSIO_LABEL, "label", NULL, 0, 1, NULL, shader_glsl_label, WINED3DVS_VERSION(2,0), -1},
125 {WINED3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
126 {WINED3DSIO_TEXLDL, "texldl", NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
127 {0, NULL, NULL, 0, 0, NULL, NULL, 0, 0}
130 static void vshader_set_limits(
131 IWineD3DVertexShaderImpl *This) {
133 This->baseShader.limits.texcoord = 0;
134 This->baseShader.limits.attributes = 16;
135 This->baseShader.limits.packed_input = 0;
137 /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
138 This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
140 switch (This->baseShader.hex_version) {
141 case WINED3DVS_VERSION(1,0):
142 case WINED3DVS_VERSION(1,1):
143 This->baseShader.limits.temporary = 12;
144 This->baseShader.limits.constant_bool = 0;
145 This->baseShader.limits.constant_int = 0;
146 This->baseShader.limits.address = 1;
147 This->baseShader.limits.packed_output = 0;
148 This->baseShader.limits.sampler = 0;
149 This->baseShader.limits.label = 0;
152 case WINED3DVS_VERSION(2,0):
153 case WINED3DVS_VERSION(2,1):
154 This->baseShader.limits.temporary = 12;
155 This->baseShader.limits.constant_bool = 16;
156 This->baseShader.limits.constant_int = 16;
157 This->baseShader.limits.address = 1;
158 This->baseShader.limits.packed_output = 0;
159 This->baseShader.limits.sampler = 0;
160 This->baseShader.limits.label = 16;
163 case WINED3DVS_VERSION(3,0):
164 This->baseShader.limits.temporary = 32;
165 This->baseShader.limits.constant_bool = 32;
166 This->baseShader.limits.constant_int = 32;
167 This->baseShader.limits.address = 1;
168 This->baseShader.limits.packed_output = 12;
169 This->baseShader.limits.sampler = 4;
170 This->baseShader.limits.label = 16; /* FIXME: 2048 */
173 default: This->baseShader.limits.temporary = 12;
174 This->baseShader.limits.constant_bool = 16;
175 This->baseShader.limits.constant_int = 16;
176 This->baseShader.limits.address = 1;
177 This->baseShader.limits.packed_output = 0;
178 This->baseShader.limits.sampler = 0;
179 This->baseShader.limits.label = 16;
180 FIXME("Unrecognized vertex shader version %#x\n",
181 This->baseShader.hex_version);
185 /* This is an internal function,
186 * used to create fake semantics for shaders
187 * that don't have them - d3d8 shaders where the declaration
188 * stores the register for each input
190 static void vshader_set_input(
191 IWineD3DVertexShaderImpl* This,
193 BYTE usage, BYTE usage_idx) {
195 /* Fake usage: set reserved bit, usage, usage_idx */
196 DWORD usage_token = (0x1 << 31) |
197 (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
199 /* Fake register; set reserved bit, regnum, type: input, wmask: all */
200 DWORD reg_token = (0x1 << 31) |
201 WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
203 This->semantics_in[regnum].usage = usage_token;
204 This->semantics_in[regnum].reg = reg_token;
207 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
208 if (usage_idx1 != usage_idx2) return FALSE;
209 if (usage1 == usage2) return TRUE;
210 if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
211 if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
216 BOOL vshader_get_input(
217 IWineD3DVertexShader* iface,
218 BYTE usage_req, BYTE usage_idx_req,
219 unsigned int* regnum) {
221 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
224 for (i = 0; i < MAX_ATTRIBS; i++) {
225 DWORD usage_token = This->semantics_in[i].usage;
226 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
227 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
229 if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
237 BOOL vshader_input_is_color(
238 IWineD3DVertexShader* iface,
239 unsigned int regnum) {
241 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
243 DWORD usage_token = This->semantics_in[regnum].usage;
244 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
245 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
249 for(i = 0; i < This->num_swizzled_attribs; i++) {
250 if(This->swizzled_attribs[i].usage == usage &&
251 This->swizzled_attribs[i].idx == usage_idx) {
258 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
260 UINT numoldswizzles = This->num_swizzled_attribs;
261 IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
263 DWORD usage_token, usage, usage_idx;
266 attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
268 /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
269 memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
271 memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
273 for(i = 0; i < decl->num_swizzled_attribs; i++) {
274 for(j = 0; j < MAX_ATTRIBS; j++) {
276 if(!This->baseShader.reg_maps.attributes[j]) continue;
278 usage_token = This->semantics_in[j].usage;
279 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
280 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
282 if(decl->swizzled_attribs[i].usage == usage &&
283 decl->swizzled_attribs[i].idx == usage_idx) {
284 This->swizzled_attribs[num].usage = usage;
285 This->swizzled_attribs[num].idx = usage_idx;
291 /* Add previously converted attributes back in if they are not defined in the current declaration */
292 for(i = 0; i < numoldswizzles; i++) {
295 for(j = 0; j < decl->declarationWNumElements; j++) {
296 if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
297 oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
302 /* This previously converted attribute is declared in the current declaration. Either it is
303 * already in the new array, or it should not be there. Skip it
307 /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
308 * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
309 * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
310 * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
311 * stays unswizzled as well because it isn't found in the oldswizzles array
313 for(j = 0; j < num; j++) {
314 if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
315 oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
316 oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
317 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
318 sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
322 This->swizzled_attribs[j].usage = oldswizzles[i].usage;
323 This->swizzled_attribs[j].idx = oldswizzles[i].idx;
327 TRACE("New swizzled attributes array\n");
328 for(i = 0; i < num; i++) {
329 TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
330 This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
332 This->num_swizzled_attribs = num;
334 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
335 or GLSL and send it to the card */
336 static VOID IWineD3DVertexShaderImpl_GenerateShader(
337 IWineD3DVertexShader *iface,
338 shader_reg_maps* reg_maps,
339 CONST DWORD *pFunction) {
341 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
342 IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
343 SHADER_BUFFER buffer;
345 find_swizzled_attribs(decl, This);
347 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
348 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
349 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
350 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
351 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
352 This->fixupVertexBufferSize = PGMSIZE;
353 This->fixupVertexBuffer[0] = 0;
355 buffer.buffer = This->device->fixupVertexBuffer;
357 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
361 buffer.newline = TRUE;
363 ((IWineD3DDeviceImpl *)This->baseShader.device)->shader_backend->shader_generate_vshader(iface, &buffer);
365 #if 1 /* if were using the data buffer of device then we don't need to free it */
366 HeapFree(GetProcessHeap(), 0, buffer.buffer);
370 /* *******************************************
371 IWineD3DVertexShader IUnknown parts follow
372 ******************************************* */
373 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
374 return IWineD3DBaseShaderImpl_QueryInterface((IWineD3DBaseShader *) iface, riid, ppobj);
377 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
378 return IWineD3DBaseShaderImpl_AddRef((IWineD3DBaseShader *) iface);
381 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
382 return IWineD3DBaseShaderImpl_Release((IWineD3DBaseShader *) iface);
385 /* *******************************************
386 IWineD3DVertexShader IWineD3DVertexShader parts follow
387 ******************************************* */
389 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
390 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
392 *parent = This->parent;
393 IUnknown_AddRef(*parent);
394 TRACE("(%p) : returning %p\n", This, *parent);
398 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
399 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
400 IWineD3DDevice_AddRef(This->baseShader.device);
401 *pDevice = This->baseShader.device;
402 TRACE("(%p) returning %p\n", This, *pDevice);
406 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
407 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
408 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
411 *pSizeOfData = This->baseShader.functionLength;
414 if (*pSizeOfData < This->baseShader.functionLength) {
415 /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
416 * than the required size we should write the required size and
417 * return D3DERR_MOREDATA. That's not actually true. */
418 return WINED3DERR_INVALIDCALL;
420 if (NULL == This->baseShader.function) { /* no function defined */
421 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
422 (*(DWORD **) pData) = NULL;
424 if(This->baseShader.functionLength == 0){
427 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
428 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
433 /* Note that for vertex shaders CompileShader isn't called until the
434 * shader is first used. The reason for this is that we need the vertex
435 * declaration the shader will be used with in order to determine if
436 * the data in a register is of type D3DCOLOR, and needs swizzling. */
437 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
439 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
440 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
442 shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
444 TRACE("(%p) : pFunction %p\n", iface, pFunction);
446 /* First pass: trace shader */
447 shader_trace_init((IWineD3DBaseShader*) This, pFunction);
448 vshader_set_limits(This);
450 /* Initialize immediate constant lists */
451 list_init(&This->baseShader.constantsF);
452 list_init(&This->baseShader.constantsB);
453 list_init(&This->baseShader.constantsI);
455 /* Second pass: figure out registers used, semantics, etc.. */
456 This->min_rel_offset = GL_LIMITS(vshader_constantsF);
457 This->max_rel_offset = 0;
458 memset(reg_maps, 0, sizeof(shader_reg_maps));
459 hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
460 This->semantics_in, This->semantics_out, pFunction, NULL);
461 if (hr != WINED3D_OK) return hr;
463 This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
465 if(deviceImpl->vs_selected_mode == SHADER_ARB &&
466 (GLINFO_LOCATION).arb_vs_offset_limit &&
467 This->min_rel_offset <= This->max_rel_offset) {
469 if(This->max_rel_offset - This->min_rel_offset > 127) {
470 FIXME("The difference between the minimum and maximum relative offset is > 127\n");
471 FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
472 FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
473 } else if(This->max_rel_offset - This->min_rel_offset > 63) {
474 This->rel_offset = This->min_rel_offset + 63;
475 } else if(This->max_rel_offset > 63) {
476 This->rel_offset = This->min_rel_offset;
478 This->rel_offset = 0;
481 This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
483 /* copy the function ... because it will certainly be released by application */
484 if (NULL != pFunction) {
487 function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
488 if (!function) return E_OUTOFMEMORY;
489 memcpy(function, pFunction, This->baseShader.functionLength);
490 This->baseShader.function = function;
492 This->baseShader.function = NULL;
498 /* Preload semantics for d3d8 shaders */
499 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
500 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
501 IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
504 for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
505 WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
506 vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
510 /* Set local constants for d3d8 shaders */
511 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
512 UINT start_idx, const float *src_data, UINT count) {
513 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
516 TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
518 end_idx = start_idx + count;
519 if (end_idx > GL_LIMITS(vshader_constantsF)) {
520 WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
521 end_idx = GL_LIMITS(vshader_constantsF);
524 for (i = start_idx; i < end_idx; ++i) {
525 local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
526 if (!lconst) return E_OUTOFMEMORY;
529 memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
530 list_add_head(&This->baseShader.constantsF, &lconst->entry);
536 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
544 for(i = 0; i < vdecl->declarationWNumElements; i++) {
545 /* Ignore tesselated streams and the termination entry(position0, stream 255, unused) */
546 if(vdecl->pDeclarationWine[i].Stream >= MAX_STREAMS ||
547 vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_UNUSED) continue;
549 for(j = 0; j < MAX_ATTRIBS; j++) {
550 if(!This->baseShader.reg_maps.attributes[j]) continue;
552 usage_token = This->semantics_in[j].usage;
553 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
554 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
556 if(vdecl->pDeclarationWine[i].Usage != usage ||
557 vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
562 for(k = 0; k < This->num_swizzled_attribs; k++) {
563 if(This->swizzled_attribs[k].usage == usage &&
564 This->swizzled_attribs[k].idx == usage_idx) {
568 if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
569 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
570 debug_d3ddeclusage(usage), usage_idx);
573 if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
574 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
575 debug_d3ddeclusage(usage), usage_idx);
583 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
584 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
585 IWineD3DVertexDeclarationImpl *vdecl;
586 CONST DWORD *function = This->baseShader.function;
587 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
589 TRACE("(%p) : function %p\n", iface, function);
591 /* We're already compiled. */
592 if (This->baseShader.is_compiled) {
593 vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
595 if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
596 memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
598 /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
599 * we have to recompile, but we have to take a deeper look at see if the attribs that differ
600 * are declared in the decl and used in the shader
602 if(swizzled_attribs_differ(This, vdecl)) {
603 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
606 WARN("Swizzled attribute validation required an expensive comparison\n");
612 if(This->recompile_count < 50) {
613 This->recompile_count++;
615 FIXME("Vertexshader %p recompiled more than 50 times\n", This);
618 deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
621 /* We don't need to compile */
623 This->baseShader.is_compiled = TRUE;
627 /* Generate the HW shader */
628 TRACE("(%p) : Generating hardware program\n", This);
629 IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
631 This->baseShader.is_compiled = TRUE;
636 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
638 /*** IUnknown methods ***/
639 IWineD3DVertexShaderImpl_QueryInterface,
640 IWineD3DVertexShaderImpl_AddRef,
641 IWineD3DVertexShaderImpl_Release,
642 /*** IWineD3DBase methods ***/
643 IWineD3DVertexShaderImpl_GetParent,
644 /*** IWineD3DBaseShader methods ***/
645 IWineD3DVertexShaderImpl_SetFunction,
646 IWineD3DVertexShaderImpl_CompileShader,
647 /*** IWineD3DVertexShader methods ***/
648 IWineD3DVertexShaderImpl_GetDevice,
649 IWineD3DVertexShaderImpl_GetFunction,
650 IWineD3DVertexShaderImpl_FakeSemantics,
651 IWIneD3DVertexShaderImpl_SetLocalConstantsF