2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2004 Christian Costa
7 * Copyright 2005 Oliver Stieber
8 * Copyright 2006 Ivan Gyurdiev
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "wined3d_private.h"
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
36 /* Shader debugging - Change the following line to enable debugging of software
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
43 # define TRACE_VSVECTOR(name)
47 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
48 * http://developer.nvidia.com/view.asp?IO=vstovp
50 * NVIDIA: Memory Management with VAR
51 * http://developer.nvidia.com/view.asp?IO=var_memory_management
54 /* TODO: Vertex and Pixel shaders are almost identical, the only exception being the way that some of the data is looked up or the availability of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
55 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't surprise me if the program can be cross compiled using a large body of shared code */
57 #define GLNAME_REQUIRE_GLSL ((const char *)1)
59 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
60 /* This table is not order or position dependent. */
63 {WINED3DSIO_NOP, "nop", "NOP", 0, 0, vshader_hw_map2gl, NULL, 0, 0},
64 {WINED3DSIO_MOV, "mov", "MOV", 1, 2, vshader_hw_map2gl, shader_glsl_mov, 0, 0},
65 {WINED3DSIO_MOVA, "mova", NULL, 1, 2, vshader_hw_map2gl, shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
66 {WINED3DSIO_ADD, "add", "ADD", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
67 {WINED3DSIO_SUB, "sub", "SUB", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
68 {WINED3DSIO_MAD, "mad", "MAD", 1, 4, vshader_hw_map2gl, shader_glsl_mad, 0, 0},
69 {WINED3DSIO_MUL, "mul", "MUL", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
70 {WINED3DSIO_RCP, "rcp", "RCP", 1, 2, vshader_hw_rsq_rcp, shader_glsl_rcp, 0, 0},
71 {WINED3DSIO_RSQ, "rsq", "RSQ", 1, 2, vshader_hw_rsq_rcp, shader_glsl_rsq, 0, 0},
72 {WINED3DSIO_DP3, "dp3", "DP3", 1, 3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
73 {WINED3DSIO_DP4, "dp4", "DP4", 1, 3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
74 {WINED3DSIO_MIN, "min", "MIN", 1, 3, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
75 {WINED3DSIO_MAX, "max", "MAX", 1, 3, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
76 {WINED3DSIO_SLT, "slt", "SLT", 1, 3, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
77 {WINED3DSIO_SGE, "sge", "SGE", 1, 3, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
78 {WINED3DSIO_ABS, "abs", "ABS", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
79 {WINED3DSIO_EXP, "exp", "EX2", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
80 {WINED3DSIO_LOG, "log", "LG2", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
81 {WINED3DSIO_EXPP, "expp", "EXP", 1, 2, vshader_hw_map2gl, shader_glsl_expp, 0, 0},
82 {WINED3DSIO_LOGP, "logp", "LOG", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
83 {WINED3DSIO_LIT, "lit", "LIT", 1, 2, vshader_hw_map2gl, shader_glsl_lit, 0, 0},
84 {WINED3DSIO_DST, "dst", "DST", 1, 3, vshader_hw_map2gl, shader_glsl_dst, 0, 0},
85 {WINED3DSIO_LRP, "lrp", "LRP", 1, 4, NULL, shader_glsl_lrp, 0, 0},
86 {WINED3DSIO_FRC, "frc", "FRC", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
87 {WINED3DSIO_POW, "pow", "POW", 1, 3, vshader_hw_map2gl, shader_glsl_pow, 0, 0},
88 {WINED3DSIO_CRS, "crs", "XPD", 1, 3, vshader_hw_map2gl, shader_glsl_cross, 0, 0},
89 /* TODO: sng can possibly be performed a s
92 {WINED3DSIO_SGN, "sgn", NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
93 {WINED3DSIO_NRM, "nrm", NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
94 {WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
95 {WINED3DSIO_SINCOS, "sincos", "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
97 {WINED3DSIO_M4x4, "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
98 {WINED3DSIO_M4x3, "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
99 {WINED3DSIO_M3x4, "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
100 {WINED3DSIO_M3x3, "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
101 {WINED3DSIO_M3x2, "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
102 /* Declare registers */
103 {WINED3DSIO_DCL, "dcl", NULL, 0, 2, NULL, NULL, 0, 0},
104 /* Constant definitions */
105 {WINED3DSIO_DEF, "def", NULL, 1, 5, NULL, NULL, 0, 0},
106 {WINED3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
107 {WINED3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
108 /* Flow control - requires GLSL or software shaders */
109 {WINED3DSIO_REP , "rep", NULL, 0, 1, NULL, shader_glsl_rep, WINED3DVS_VERSION(2,0), -1},
110 {WINED3DSIO_ENDREP, "endrep", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
111 {WINED3DSIO_IF, "if", NULL, 0, 1, NULL, shader_glsl_if, WINED3DVS_VERSION(2,0), -1},
112 {WINED3DSIO_IFC, "ifc", NULL, 0, 2, NULL, shader_glsl_ifc, WINED3DVS_VERSION(2,1), -1},
113 {WINED3DSIO_ELSE, "else", NULL, 0, 0, NULL, shader_glsl_else, WINED3DVS_VERSION(2,0), -1},
114 {WINED3DSIO_ENDIF, "endif", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
115 {WINED3DSIO_BREAK, "break", NULL, 0, 0, NULL, shader_glsl_break, WINED3DVS_VERSION(2,1), -1},
116 {WINED3DSIO_BREAKC, "breakc", NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
117 {WINED3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
118 {WINED3DSIO_CALL, "call", NULL, 0, 1, NULL, shader_glsl_call, WINED3DVS_VERSION(2,0), -1},
119 {WINED3DSIO_CALLNZ, "callnz", NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
120 {WINED3DSIO_LOOP, "loop", NULL, 0, 2, NULL, shader_glsl_loop, WINED3DVS_VERSION(2,0), -1},
121 {WINED3DSIO_RET, "ret", NULL, 0, 0, NULL, NULL, WINED3DVS_VERSION(2,0), -1},
122 {WINED3DSIO_ENDLOOP,"endloop", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
123 {WINED3DSIO_LABEL, "label", NULL, 0, 1, NULL, shader_glsl_label, WINED3DVS_VERSION(2,0), -1},
125 {WINED3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
126 {WINED3DSIO_TEXLDL, "texldl", NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
127 {0, NULL, NULL, 0, 0, NULL, NULL, 0, 0}
130 static void vshader_set_limits(
131 IWineD3DVertexShaderImpl *This) {
133 This->baseShader.limits.texcoord = 0;
134 This->baseShader.limits.attributes = 16;
135 This->baseShader.limits.packed_input = 0;
137 /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
138 This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
140 switch (This->baseShader.hex_version) {
141 case WINED3DVS_VERSION(1,0):
142 case WINED3DVS_VERSION(1,1):
143 This->baseShader.limits.temporary = 12;
144 This->baseShader.limits.constant_bool = 0;
145 This->baseShader.limits.constant_int = 0;
146 This->baseShader.limits.address = 1;
147 This->baseShader.limits.packed_output = 0;
148 This->baseShader.limits.sampler = 0;
149 This->baseShader.limits.label = 0;
152 case WINED3DVS_VERSION(2,0):
153 case WINED3DVS_VERSION(2,1):
154 This->baseShader.limits.temporary = 12;
155 This->baseShader.limits.constant_bool = 16;
156 This->baseShader.limits.constant_int = 16;
157 This->baseShader.limits.address = 1;
158 This->baseShader.limits.packed_output = 0;
159 This->baseShader.limits.sampler = 0;
160 This->baseShader.limits.label = 16;
163 case WINED3DVS_VERSION(3,0):
164 This->baseShader.limits.temporary = 32;
165 This->baseShader.limits.constant_bool = 32;
166 This->baseShader.limits.constant_int = 32;
167 This->baseShader.limits.address = 1;
168 This->baseShader.limits.packed_output = 12;
169 This->baseShader.limits.sampler = 4;
170 This->baseShader.limits.label = 16; /* FIXME: 2048 */
173 default: This->baseShader.limits.temporary = 12;
174 This->baseShader.limits.constant_bool = 16;
175 This->baseShader.limits.constant_int = 16;
176 This->baseShader.limits.address = 1;
177 This->baseShader.limits.packed_output = 0;
178 This->baseShader.limits.sampler = 0;
179 This->baseShader.limits.label = 16;
180 FIXME("Unrecognized vertex shader version %#x\n",
181 This->baseShader.hex_version);
185 /* This is an internal function,
186 * used to create fake semantics for shaders
187 * that don't have them - d3d8 shaders where the declaration
188 * stores the register for each input
190 static void vshader_set_input(
191 IWineD3DVertexShaderImpl* This,
193 BYTE usage, BYTE usage_idx) {
195 /* Fake usage: set reserved bit, usage, usage_idx */
196 DWORD usage_token = (0x1 << 31) |
197 (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
199 /* Fake register; set reserved bit, regnum, type: input, wmask: all */
200 DWORD reg_token = (0x1 << 31) |
201 WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
203 This->semantics_in[regnum].usage = usage_token;
204 This->semantics_in[regnum].reg = reg_token;
207 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
208 if (usage_idx1 != usage_idx2) return FALSE;
209 if (usage1 == usage2) return TRUE;
210 if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
211 if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
216 BOOL vshader_get_input(
217 IWineD3DVertexShader* iface,
218 BYTE usage_req, BYTE usage_idx_req,
219 unsigned int* regnum) {
221 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
224 for (i = 0; i < MAX_ATTRIBS; i++) {
225 DWORD usage_token = This->semantics_in[i].usage;
226 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
227 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
229 if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
237 BOOL vshader_input_is_color(
238 IWineD3DVertexShader* iface,
239 unsigned int regnum) {
241 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
243 DWORD usage_token = This->semantics_in[regnum].usage;
244 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
245 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
249 for(i = 0; i < This->num_swizzled_attribs; i++) {
250 if(This->swizzled_attribs[i].usage == usage &&
251 This->swizzled_attribs[i].idx == usage_idx) {
258 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
260 UINT numoldswizzles = This->num_swizzled_attribs;
261 IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
263 DWORD usage_token, usage, usage_idx;
266 attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
268 /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
269 memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
271 memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
273 for(i = 0; i < decl->num_swizzled_attribs; i++) {
274 for(j = 0; j < MAX_ATTRIBS; j++) {
276 if(!This->baseShader.reg_maps.attributes[j]) continue;
278 usage_token = This->semantics_in[j].usage;
279 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
280 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
282 if(decl->swizzled_attribs[i].usage == usage &&
283 decl->swizzled_attribs[i].idx == usage_idx) {
284 This->swizzled_attribs[num].usage = usage;
285 This->swizzled_attribs[num].idx = usage_idx;
291 /* Add previously converted attributes back in if they are not defined in the current declaration */
292 for(i = 0; i < numoldswizzles; i++) {
295 for(j = 0; j < decl->declarationWNumElements; j++) {
296 if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
297 oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
302 /* This previously converted attribute is declared in the current declaration. Either it is
303 * already in the new array, or it should not be there. Skip it
307 /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
308 * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
309 * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
310 * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
311 * stays unswizzled as well because it isn't found in the oldswizzles array
313 for(j = 0; j < num; j++) {
314 if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
315 oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
316 oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
317 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
318 sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
322 This->swizzled_attribs[j].usage = oldswizzles[i].usage;
323 This->swizzled_attribs[j].idx = oldswizzles[i].idx;
327 TRACE("New swizzled attributes array\n");
328 for(i = 0; i < num; i++) {
329 TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
330 This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
332 This->num_swizzled_attribs = num;
334 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
335 or GLSL and send it to the card */
336 static VOID IWineD3DVertexShaderImpl_GenerateShader(
337 IWineD3DVertexShader *iface,
338 shader_reg_maps* reg_maps,
339 CONST DWORD *pFunction) {
341 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
342 IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
343 SHADER_BUFFER buffer;
345 find_swizzled_attribs(decl, This);
347 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
348 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
349 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
350 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
351 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
352 This->fixupVertexBufferSize = PGMSIZE;
353 This->fixupVertexBuffer[0] = 0;
355 buffer.buffer = This->device->fixupVertexBuffer;
357 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
361 buffer.newline = TRUE;
363 if (This->baseShader.shader_mode == SHADER_GLSL) {
365 /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
366 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
368 /* Base Declarations */
369 shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
371 /* Base Shader Body */
372 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
374 /* Unpack 3.0 outputs */
375 if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0)) {
376 shader_addline(&buffer, "order_ps_input(OUT);\n");
378 shader_addline(&buffer, "order_ps_input();\n");
381 /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
383 shader_addline(&buffer, "gl_FogFragCoord = gl_Position.z;\n");
385 /* Write the final position.
387 * OpenGL coordinates specify the center of the pixel while d3d coords specify
388 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
389 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
390 * contains 1.0 to allow a mad.
392 shader_addline(&buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n");
393 shader_addline(&buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n");
395 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
397 * Basically we want (in homogeneous coordinates) z = z * 2 - 1. However, shaders are run
398 * before the homogeneous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
399 * which is the same as z = z / 2 - w.
401 shader_addline(&buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
403 shader_addline(&buffer, "}\n");
405 TRACE("Compiling shader object %u\n", shader_obj);
406 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
407 GL_EXTCALL(glCompileShaderARB(shader_obj));
408 print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
410 /* Store the shader object */
411 This->baseShader.prgId = shader_obj;
413 } else if (This->baseShader.shader_mode == SHADER_ARB) {
415 /* Create the hw ARB shader */
416 shader_addline(&buffer, "!!ARBvp1.0\n");
417 shader_addline(&buffer, "PARAM helper_const = { 2.0, -1.0, %d.0, 0.0 };\n", This->rel_offset);
419 /* Mesa supports only 95 constants */
420 if (GL_VEND(MESA) || GL_VEND(WINE))
421 This->baseShader.limits.constant_float =
422 min(95, This->baseShader.limits.constant_float);
424 shader_addline(&buffer, "TEMP TMP;\n");
426 /* Base Declarations */
427 shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
429 /* We need a constant to fixup the final position */
430 shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
432 if((GLINFO_LOCATION).set_texcoord_w) {
434 for(i = 0; i < min(8, MAX_REG_TEXCRD); i++) {
435 if(This->baseShader.reg_maps.texcoord_mask[i] != 0 &&
436 This->baseShader.reg_maps.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) {
437 shader_addline(&buffer, "MOV result.texcoord[%u].w, -helper_const.y;\n", i);
442 /* Base Shader Body */
443 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
445 /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
447 shader_addline(&buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
449 /* Write the final position.
451 * OpenGL coordinates specify the center of the pixel while d3d coords specify
452 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
453 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
454 * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
456 shader_addline(&buffer, "MUL TMP, posFixup, TMP_OUT.w;\n");
457 shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, TMP.z;\n");
458 shader_addline(&buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TMP.w;\n");
460 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
461 * and the glsl equivalent
463 shader_addline(&buffer, "MAD TMP_OUT.z, TMP_OUT.z, helper_const.x, -TMP_OUT.w;\n");
465 shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
467 shader_addline(&buffer, "END\n");
469 /* TODO: change to resource.glObjectHandle or something like that */
470 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
472 TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
473 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
475 TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
476 /* Create the program and check for errors */
477 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
478 buffer.bsize, buffer.buffer));
480 if (glGetError() == GL_INVALID_OPERATION) {
482 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
483 FIXME("HW VertexShader Error at position %d: %s\n",
484 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
485 This->baseShader.prgId = -1;
489 #if 1 /* if were using the data buffer of device then we don't need to free it */
490 HeapFree(GetProcessHeap(), 0, buffer.buffer);
494 /* *******************************************
495 IWineD3DVertexShader IUnknown parts follow
496 ******************************************* */
497 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
498 return IWineD3DBaseShaderImpl_QueryInterface((IWineD3DBaseShader *) iface, riid, ppobj);
501 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
502 return IWineD3DBaseShaderImpl_AddRef((IWineD3DBaseShader *) iface);
505 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
506 return IWineD3DBaseShaderImpl_Release((IWineD3DBaseShader *) iface);
509 /* *******************************************
510 IWineD3DVertexShader IWineD3DVertexShader parts follow
511 ******************************************* */
513 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
514 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
516 *parent = This->parent;
517 IUnknown_AddRef(*parent);
518 TRACE("(%p) : returning %p\n", This, *parent);
522 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
523 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
524 IWineD3DDevice_AddRef(This->baseShader.device);
525 *pDevice = This->baseShader.device;
526 TRACE("(%p) returning %p\n", This, *pDevice);
530 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
531 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
532 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
535 *pSizeOfData = This->baseShader.functionLength;
538 if (*pSizeOfData < This->baseShader.functionLength) {
539 /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
540 * than the required size we should write the required size and
541 * return D3DERR_MOREDATA. That's not actually true. */
542 return WINED3DERR_INVALIDCALL;
544 if (NULL == This->baseShader.function) { /* no function defined */
545 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
546 (*(DWORD **) pData) = NULL;
548 if(This->baseShader.functionLength == 0){
551 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
552 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
557 /* Note that for vertex shaders CompileShader isn't called until the
558 * shader is first used. The reason for this is that we need the vertex
559 * declaration the shader will be used with in order to determine if
560 * the data in a register is of type D3DCOLOR, and needs swizzling. */
561 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
563 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
564 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
566 shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
568 TRACE("(%p) : pFunction %p\n", iface, pFunction);
570 /* First pass: trace shader */
571 shader_trace_init((IWineD3DBaseShader*) This, pFunction);
572 vshader_set_limits(This);
574 /* Initialize immediate constant lists */
575 list_init(&This->baseShader.constantsF);
576 list_init(&This->baseShader.constantsB);
577 list_init(&This->baseShader.constantsI);
579 /* Second pass: figure out registers used, semantics, etc.. */
580 This->min_rel_offset = GL_LIMITS(vshader_constantsF);
581 This->max_rel_offset = 0;
582 memset(reg_maps, 0, sizeof(shader_reg_maps));
583 hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
584 This->semantics_in, This->semantics_out, pFunction, NULL);
585 if (hr != WINED3D_OK) return hr;
587 This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
589 if(deviceImpl->vs_selected_mode == SHADER_ARB &&
590 (GLINFO_LOCATION).arb_vs_offset_limit &&
591 This->min_rel_offset <= This->max_rel_offset) {
593 if(This->max_rel_offset - This->min_rel_offset > 127) {
594 FIXME("The difference between the minimum and maximum relative offset is > 127\n");
595 FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
596 FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
597 } else if(This->max_rel_offset - This->min_rel_offset > 63) {
598 This->rel_offset = This->min_rel_offset + 63;
599 } else if(This->max_rel_offset > 63) {
600 This->rel_offset = This->min_rel_offset;
602 This->rel_offset = 0;
605 This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
607 /* copy the function ... because it will certainly be released by application */
608 if (NULL != pFunction) {
611 function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
612 if (!function) return E_OUTOFMEMORY;
613 memcpy(function, pFunction, This->baseShader.functionLength);
614 This->baseShader.function = function;
616 This->baseShader.function = NULL;
622 /* Preload semantics for d3d8 shaders */
623 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
624 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
625 IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
628 for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
629 WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
630 vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
634 /* Set local constants for d3d8 shaders */
635 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
636 UINT start_idx, const float *src_data, UINT count) {
637 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
640 TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
642 end_idx = start_idx + count;
643 if (end_idx > GL_LIMITS(vshader_constantsF)) {
644 WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
645 end_idx = GL_LIMITS(vshader_constantsF);
648 for (i = start_idx; i < end_idx; ++i) {
649 local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
650 if (!lconst) return E_OUTOFMEMORY;
653 memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
654 list_add_head(&This->baseShader.constantsF, &lconst->entry);
660 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
668 for(i = 0; i < vdecl->declarationWNumElements; i++) {
669 /* Ignore tesselated streams and the termination entry(position0, stream 255, unused) */
670 if(vdecl->pDeclarationWine[i].Stream >= MAX_STREAMS ||
671 vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_UNUSED) continue;
673 for(j = 0; j < MAX_ATTRIBS; j++) {
674 if(!This->baseShader.reg_maps.attributes[j]) continue;
676 usage_token = This->semantics_in[j].usage;
677 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
678 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
680 if(vdecl->pDeclarationWine[i].Usage != usage ||
681 vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
686 for(k = 0; k < This->num_swizzled_attribs; k++) {
687 if(This->swizzled_attribs[k].usage == usage &&
688 This->swizzled_attribs[k].idx == usage_idx) {
692 if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
693 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
694 debug_d3ddeclusage(usage), usage_idx);
697 if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
698 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
699 debug_d3ddeclusage(usage), usage_idx);
707 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
708 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
709 IWineD3DVertexDeclarationImpl *vdecl;
710 CONST DWORD *function = This->baseShader.function;
711 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
713 TRACE("(%p) : function %p\n", iface, function);
715 /* We're already compiled. */
716 if (This->baseShader.is_compiled) {
717 vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
719 if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
720 memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
722 /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
723 * we have to recompile, but we have to take a deeper look at see if the attribs that differ
724 * are declared in the decl and used in the shader
726 if(swizzled_attribs_differ(This, vdecl)) {
727 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
730 WARN("Swizzled attribute validation required an expensive comparison\n");
736 if(This->recompile_count < 50) {
737 This->recompile_count++;
739 FIXME("Vertexshader %p recompiled more than 50 times\n", This);
742 deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
745 /* We don't need to compile */
747 This->baseShader.is_compiled = TRUE;
751 /* Generate the HW shader */
752 TRACE("(%p) : Generating hardware program\n", This);
753 IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
755 This->baseShader.is_compiled = TRUE;
760 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
762 /*** IUnknown methods ***/
763 IWineD3DVertexShaderImpl_QueryInterface,
764 IWineD3DVertexShaderImpl_AddRef,
765 IWineD3DVertexShaderImpl_Release,
766 /*** IWineD3DBase methods ***/
767 IWineD3DVertexShaderImpl_GetParent,
768 /*** IWineD3DBaseShader methods ***/
769 IWineD3DVertexShaderImpl_SetFunction,
770 IWineD3DVertexShaderImpl_CompileShader,
771 /*** IWineD3DVertexShader methods ***/
772 IWineD3DVertexShaderImpl_GetDevice,
773 IWineD3DVertexShaderImpl_GetFunction,
774 IWineD3DVertexShaderImpl_FakeSemantics,
775 IWIneD3DVertexShaderImpl_SetLocalConstantsF