2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2004 Christian Costa
7 * Copyright 2005 Oliver Stieber
8 * Copyright 2006 Ivan Gyurdiev
9 * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
31 #include "wined3d_private.h"
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
35 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
37 /* Shader debugging - Change the following line to enable debugging of software
39 #if 0 /* Musxt not be 1 in cvs version */
40 # define VSTRACE(A) TRACE A
41 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
44 # define TRACE_VSVECTOR(name)
48 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
49 * http://developer.nvidia.com/view.asp?IO=vstovp
51 * NVIDIA: Memory Management with VAR
52 * http://developer.nvidia.com/view.asp?IO=var_memory_management
55 /* TODO: Vertex and Pixel shaders are almost identical, the only exception being the way that some of the data is looked up or the availability of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
56 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't surprise me if the program can be cross compiled using a large body of shared code */
58 #define GLNAME_REQUIRE_GLSL ((const char *)1)
60 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
61 /* This table is not order or position dependent. */
64 {WINED3DSIO_NOP, "nop", "NOP", 0, 0, vshader_hw_map2gl, NULL, 0, 0},
65 {WINED3DSIO_MOV, "mov", "MOV", 1, 2, vshader_hw_map2gl, shader_glsl_mov, 0, 0},
66 {WINED3DSIO_MOVA, "mova", NULL, 1, 2, vshader_hw_map2gl, shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
67 {WINED3DSIO_ADD, "add", "ADD", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
68 {WINED3DSIO_SUB, "sub", "SUB", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
69 {WINED3DSIO_MAD, "mad", "MAD", 1, 4, vshader_hw_map2gl, shader_glsl_mad, 0, 0},
70 {WINED3DSIO_MUL, "mul", "MUL", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
71 {WINED3DSIO_RCP, "rcp", "RCP", 1, 2, vshader_hw_rsq_rcp, shader_glsl_rcp, 0, 0},
72 {WINED3DSIO_RSQ, "rsq", "RSQ", 1, 2, vshader_hw_rsq_rcp, shader_glsl_rsq, 0, 0},
73 {WINED3DSIO_DP3, "dp3", "DP3", 1, 3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
74 {WINED3DSIO_DP4, "dp4", "DP4", 1, 3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
75 {WINED3DSIO_MIN, "min", "MIN", 1, 3, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
76 {WINED3DSIO_MAX, "max", "MAX", 1, 3, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
77 {WINED3DSIO_SLT, "slt", "SLT", 1, 3, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
78 {WINED3DSIO_SGE, "sge", "SGE", 1, 3, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
79 {WINED3DSIO_ABS, "abs", "ABS", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
80 {WINED3DSIO_EXP, "exp", "EX2", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
81 {WINED3DSIO_LOG, "log", "LG2", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
82 {WINED3DSIO_EXPP, "expp", "EXP", 1, 2, vshader_hw_map2gl, shader_glsl_expp, 0, 0},
83 {WINED3DSIO_LOGP, "logp", "LOG", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
84 {WINED3DSIO_LIT, "lit", "LIT", 1, 2, vshader_hw_map2gl, shader_glsl_lit, 0, 0},
85 {WINED3DSIO_DST, "dst", "DST", 1, 3, vshader_hw_map2gl, shader_glsl_dst, 0, 0},
86 {WINED3DSIO_LRP, "lrp", "LRP", 1, 4, NULL, shader_glsl_lrp, 0, 0},
87 {WINED3DSIO_FRC, "frc", "FRC", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
88 {WINED3DSIO_POW, "pow", "POW", 1, 3, vshader_hw_map2gl, shader_glsl_pow, 0, 0},
89 {WINED3DSIO_CRS, "crs", "XPD", 1, 3, vshader_hw_map2gl, shader_glsl_cross, 0, 0},
90 /* TODO: sng can possibly be performed a s
93 {WINED3DSIO_SGN, "sgn", NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
94 {WINED3DSIO_NRM, "nrm", NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
95 {WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
96 {WINED3DSIO_SINCOS, "sincos", "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
98 {WINED3DSIO_M4x4, "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
99 {WINED3DSIO_M4x3, "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
100 {WINED3DSIO_M3x4, "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
101 {WINED3DSIO_M3x3, "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
102 {WINED3DSIO_M3x2, "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
103 /* Declare registers */
104 {WINED3DSIO_DCL, "dcl", NULL, 0, 2, NULL, NULL, 0, 0},
105 /* Constant definitions */
106 {WINED3DSIO_DEF, "def", NULL, 1, 5, NULL, NULL, 0, 0},
107 {WINED3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
108 {WINED3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
109 /* Flow control - requires GLSL or software shaders */
110 {WINED3DSIO_REP , "rep", NULL, 0, 1, NULL, shader_glsl_rep, WINED3DVS_VERSION(2,0), -1},
111 {WINED3DSIO_ENDREP, "endrep", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
112 {WINED3DSIO_IF, "if", NULL, 0, 1, NULL, shader_glsl_if, WINED3DVS_VERSION(2,0), -1},
113 {WINED3DSIO_IFC, "ifc", NULL, 0, 2, NULL, shader_glsl_ifc, WINED3DVS_VERSION(2,1), -1},
114 {WINED3DSIO_ELSE, "else", NULL, 0, 0, NULL, shader_glsl_else, WINED3DVS_VERSION(2,0), -1},
115 {WINED3DSIO_ENDIF, "endif", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
116 {WINED3DSIO_BREAK, "break", NULL, 0, 0, NULL, shader_glsl_break, WINED3DVS_VERSION(2,1), -1},
117 {WINED3DSIO_BREAKC, "breakc", NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
118 {WINED3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
119 {WINED3DSIO_CALL, "call", NULL, 0, 1, NULL, shader_glsl_call, WINED3DVS_VERSION(2,0), -1},
120 {WINED3DSIO_CALLNZ, "callnz", NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
121 {WINED3DSIO_LOOP, "loop", NULL, 0, 2, NULL, shader_glsl_loop, WINED3DVS_VERSION(2,0), -1},
122 {WINED3DSIO_RET, "ret", NULL, 0, 0, NULL, NULL, WINED3DVS_VERSION(2,0), -1},
123 {WINED3DSIO_ENDLOOP,"endloop", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
124 {WINED3DSIO_LABEL, "label", NULL, 0, 1, NULL, shader_glsl_label, WINED3DVS_VERSION(2,0), -1},
126 {WINED3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
127 {WINED3DSIO_TEXLDL, "texldl", NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
128 {0, NULL, NULL, 0, 0, NULL, NULL, 0, 0}
131 static void vshader_set_limits(
132 IWineD3DVertexShaderImpl *This) {
134 This->baseShader.limits.texcoord = 0;
135 This->baseShader.limits.attributes = 16;
136 This->baseShader.limits.packed_input = 0;
138 /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
139 This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
141 switch (This->baseShader.hex_version) {
142 case WINED3DVS_VERSION(1,0):
143 case WINED3DVS_VERSION(1,1):
144 This->baseShader.limits.temporary = 12;
145 This->baseShader.limits.constant_bool = 0;
146 This->baseShader.limits.constant_int = 0;
147 This->baseShader.limits.address = 1;
148 This->baseShader.limits.packed_output = 0;
149 This->baseShader.limits.sampler = 0;
150 This->baseShader.limits.label = 0;
153 case WINED3DVS_VERSION(2,0):
154 case WINED3DVS_VERSION(2,1):
155 This->baseShader.limits.temporary = 12;
156 This->baseShader.limits.constant_bool = 16;
157 This->baseShader.limits.constant_int = 16;
158 This->baseShader.limits.address = 1;
159 This->baseShader.limits.packed_output = 0;
160 This->baseShader.limits.sampler = 0;
161 This->baseShader.limits.label = 16;
164 case WINED3DVS_VERSION(3,0):
165 This->baseShader.limits.temporary = 32;
166 This->baseShader.limits.constant_bool = 32;
167 This->baseShader.limits.constant_int = 32;
168 This->baseShader.limits.address = 1;
169 This->baseShader.limits.packed_output = 12;
170 This->baseShader.limits.sampler = 4;
171 This->baseShader.limits.label = 16; /* FIXME: 2048 */
174 default: This->baseShader.limits.temporary = 12;
175 This->baseShader.limits.constant_bool = 16;
176 This->baseShader.limits.constant_int = 16;
177 This->baseShader.limits.address = 1;
178 This->baseShader.limits.packed_output = 0;
179 This->baseShader.limits.sampler = 0;
180 This->baseShader.limits.label = 16;
181 FIXME("Unrecognized vertex shader version %#x\n",
182 This->baseShader.hex_version);
186 /* This is an internal function,
187 * used to create fake semantics for shaders
188 * that don't have them - d3d8 shaders where the declaration
189 * stores the register for each input
191 static void vshader_set_input(
192 IWineD3DVertexShaderImpl* This,
194 BYTE usage, BYTE usage_idx) {
196 /* Fake usage: set reserved bit, usage, usage_idx */
197 DWORD usage_token = (0x1 << 31) |
198 (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
200 /* Fake register; set reserved bit, regnum, type: input, wmask: all */
201 DWORD reg_token = (0x1 << 31) |
202 WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
204 This->semantics_in[regnum].usage = usage_token;
205 This->semantics_in[regnum].reg = reg_token;
208 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
209 if (usage_idx1 != usage_idx2) return FALSE;
210 if (usage1 == usage2) return TRUE;
211 if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
212 if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
217 BOOL vshader_get_input(
218 IWineD3DVertexShader* iface,
219 BYTE usage_req, BYTE usage_idx_req,
220 unsigned int* regnum) {
222 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
225 for (i = 0; i < MAX_ATTRIBS; i++) {
226 DWORD usage_token = This->semantics_in[i].usage;
227 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
228 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
230 if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
238 BOOL vshader_input_is_color(
239 IWineD3DVertexShader* iface,
240 unsigned int regnum) {
242 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
244 DWORD usage_token = This->semantics_in[regnum].usage;
245 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
246 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
250 for(i = 0; i < This->num_swizzled_attribs; i++) {
251 if(This->swizzled_attribs[i].usage == usage &&
252 This->swizzled_attribs[i].idx == usage_idx) {
259 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
261 UINT numoldswizzles = This->num_swizzled_attribs;
262 IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
264 DWORD usage_token, usage, usage_idx;
267 attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
269 /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
270 memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
272 memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
274 for(i = 0; i < decl->num_swizzled_attribs; i++) {
275 for(j = 0; j < MAX_ATTRIBS; j++) {
277 if(!This->baseShader.reg_maps.attributes[j]) continue;
279 usage_token = This->semantics_in[j].usage;
280 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
281 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
283 if(decl->swizzled_attribs[i].usage == usage &&
284 decl->swizzled_attribs[i].idx == usage_idx) {
285 This->swizzled_attribs[num].usage = usage;
286 This->swizzled_attribs[num].idx = usage_idx;
292 /* Add previously converted attributes back in if they are not defined in the current declaration */
293 for(i = 0; i < numoldswizzles; i++) {
296 for(j = 0; j < decl->declarationWNumElements; j++) {
297 if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
298 oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
303 /* This previously converted attribute is declared in the current declaration. Either it is
304 * already in the new array, or it should not be there. Skip it
308 /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
309 * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
310 * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
311 * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
312 * stays unswizzled as well because it isn't found in the oldswizzles array
314 for(j = 0; j < num; j++) {
315 if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
316 oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
317 oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
318 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
319 sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
323 This->swizzled_attribs[j].usage = oldswizzles[i].usage;
324 This->swizzled_attribs[j].idx = oldswizzles[i].idx;
328 TRACE("New swizzled attributes array\n");
329 for(i = 0; i < num; i++) {
330 TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
331 This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
333 This->num_swizzled_attribs = num;
335 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
336 or GLSL and send it to the card */
337 static VOID IWineD3DVertexShaderImpl_GenerateShader(
338 IWineD3DVertexShader *iface,
339 shader_reg_maps* reg_maps,
340 CONST DWORD *pFunction) {
342 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
343 IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
344 SHADER_BUFFER buffer;
346 find_swizzled_attribs(decl, This);
348 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
349 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
350 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
351 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
352 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
353 This->fixupVertexBufferSize = PGMSIZE;
354 This->fixupVertexBuffer[0] = 0;
356 buffer.buffer = This->device->fixupVertexBuffer;
358 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
362 buffer.newline = TRUE;
364 ((IWineD3DDeviceImpl *)This->baseShader.device)->shader_backend->shader_generate_vshader(iface, &buffer);
366 #if 1 /* if were using the data buffer of device then we don't need to free it */
367 HeapFree(GetProcessHeap(), 0, buffer.buffer);
371 /* *******************************************
372 IWineD3DVertexShader IUnknown parts follow
373 ******************************************* */
374 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
375 return IWineD3DBaseShaderImpl_QueryInterface((IWineD3DBaseShader *) iface, riid, ppobj);
378 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
379 return IWineD3DBaseShaderImpl_AddRef((IWineD3DBaseShader *) iface);
382 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
383 return IWineD3DBaseShaderImpl_Release((IWineD3DBaseShader *) iface);
386 /* *******************************************
387 IWineD3DVertexShader IWineD3DVertexShader parts follow
388 ******************************************* */
390 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
391 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
393 *parent = This->parent;
394 IUnknown_AddRef(*parent);
395 TRACE("(%p) : returning %p\n", This, *parent);
399 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
400 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
401 IWineD3DDevice_AddRef(This->baseShader.device);
402 *pDevice = This->baseShader.device;
403 TRACE("(%p) returning %p\n", This, *pDevice);
407 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
408 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
409 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
412 *pSizeOfData = This->baseShader.functionLength;
415 if (*pSizeOfData < This->baseShader.functionLength) {
416 /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
417 * than the required size we should write the required size and
418 * return D3DERR_MOREDATA. That's not actually true. */
419 return WINED3DERR_INVALIDCALL;
421 if (NULL == This->baseShader.function) { /* no function defined */
422 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
423 (*(DWORD **) pData) = NULL;
425 if(This->baseShader.functionLength == 0){
428 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
429 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
434 /* Note that for vertex shaders CompileShader isn't called until the
435 * shader is first used. The reason for this is that we need the vertex
436 * declaration the shader will be used with in order to determine if
437 * the data in a register is of type D3DCOLOR, and needs swizzling. */
438 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
440 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
441 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
443 shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
445 TRACE("(%p) : pFunction %p\n", iface, pFunction);
447 /* First pass: trace shader */
448 shader_trace_init((IWineD3DBaseShader*) This, pFunction);
449 vshader_set_limits(This);
451 /* Initialize immediate constant lists */
452 list_init(&This->baseShader.constantsF);
453 list_init(&This->baseShader.constantsB);
454 list_init(&This->baseShader.constantsI);
456 /* Second pass: figure out registers used, semantics, etc.. */
457 This->min_rel_offset = GL_LIMITS(vshader_constantsF);
458 This->max_rel_offset = 0;
459 memset(reg_maps, 0, sizeof(shader_reg_maps));
460 hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
461 This->semantics_in, This->semantics_out, pFunction, NULL);
462 if (hr != WINED3D_OK) return hr;
464 This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
466 if(deviceImpl->vs_selected_mode == SHADER_ARB &&
467 (GLINFO_LOCATION).arb_vs_offset_limit &&
468 This->min_rel_offset <= This->max_rel_offset) {
470 if(This->max_rel_offset - This->min_rel_offset > 127) {
471 FIXME("The difference between the minimum and maximum relative offset is > 127\n");
472 FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
473 FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
474 } else if(This->max_rel_offset - This->min_rel_offset > 63) {
475 This->rel_offset = This->min_rel_offset + 63;
476 } else if(This->max_rel_offset > 63) {
477 This->rel_offset = This->min_rel_offset;
479 This->rel_offset = 0;
482 This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
484 /* copy the function ... because it will certainly be released by application */
485 if (NULL != pFunction) {
488 function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
489 if (!function) return E_OUTOFMEMORY;
490 memcpy(function, pFunction, This->baseShader.functionLength);
491 This->baseShader.function = function;
493 This->baseShader.function = NULL;
499 /* Preload semantics for d3d8 shaders */
500 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
501 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
502 IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
505 for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
506 WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
507 vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
511 /* Set local constants for d3d8 shaders */
512 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
513 UINT start_idx, const float *src_data, UINT count) {
514 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
517 TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
519 end_idx = start_idx + count;
520 if (end_idx > GL_LIMITS(vshader_constantsF)) {
521 WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
522 end_idx = GL_LIMITS(vshader_constantsF);
525 for (i = start_idx; i < end_idx; ++i) {
526 local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
527 if (!lconst) return E_OUTOFMEMORY;
530 memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
531 list_add_head(&This->baseShader.constantsF, &lconst->entry);
537 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
545 for(i = 0; i < vdecl->declarationWNumElements; i++) {
546 /* Ignore tesselated streams and the termination entry(position0, stream 255, unused) */
547 if(vdecl->pDeclarationWine[i].Stream >= MAX_STREAMS ||
548 vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_UNUSED) continue;
550 for(j = 0; j < MAX_ATTRIBS; j++) {
551 if(!This->baseShader.reg_maps.attributes[j]) continue;
553 usage_token = This->semantics_in[j].usage;
554 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
555 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
557 if(vdecl->pDeclarationWine[i].Usage != usage ||
558 vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
563 for(k = 0; k < This->num_swizzled_attribs; k++) {
564 if(This->swizzled_attribs[k].usage == usage &&
565 This->swizzled_attribs[k].idx == usage_idx) {
569 if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
570 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
571 debug_d3ddeclusage(usage), usage_idx);
574 if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
575 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
576 debug_d3ddeclusage(usage), usage_idx);
584 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
585 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
586 IWineD3DVertexDeclarationImpl *vdecl;
587 CONST DWORD *function = This->baseShader.function;
588 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
590 TRACE("(%p) : function %p\n", iface, function);
592 /* We're already compiled. */
593 if (This->baseShader.is_compiled) {
594 vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
596 if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
597 memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
599 /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
600 * we have to recompile, but we have to take a deeper look at see if the attribs that differ
601 * are declared in the decl and used in the shader
603 if(swizzled_attribs_differ(This, vdecl)) {
604 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
607 WARN("Swizzled attribute validation required an expensive comparison\n");
613 if(This->recompile_count < 50) {
614 This->recompile_count++;
616 FIXME("Vertexshader %p recompiled more than 50 times\n", This);
619 deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
622 /* We don't need to compile */
624 This->baseShader.is_compiled = TRUE;
628 /* Generate the HW shader */
629 TRACE("(%p) : Generating hardware program\n", This);
630 IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
632 This->baseShader.is_compiled = TRUE;
637 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
639 /*** IUnknown methods ***/
640 IWineD3DVertexShaderImpl_QueryInterface,
641 IWineD3DVertexShaderImpl_AddRef,
642 IWineD3DVertexShaderImpl_Release,
643 /*** IWineD3DBase methods ***/
644 IWineD3DVertexShaderImpl_GetParent,
645 /*** IWineD3DBaseShader methods ***/
646 IWineD3DVertexShaderImpl_SetFunction,
647 IWineD3DVertexShaderImpl_CompileShader,
648 /*** IWineD3DVertexShader methods ***/
649 IWineD3DVertexShaderImpl_GetDevice,
650 IWineD3DVertexShaderImpl_GetFunction,
651 IWineD3DVertexShaderImpl_FakeSemantics,
652 IWIneD3DVertexShaderImpl_SetLocalConstantsF