winealsa: Use "default" as the default card name instead of "default:0".
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24
25 #include "config.h"
26
27 #include <math.h>
28 #include <stdio.h>
29
30 #include "wined3d_private.h"
31
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
35
36 /* Shader debugging - Change the following line to enable debugging of software
37       vertex shaders                                                             */
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
45
46 /**
47  * NVIDIA: DX8 Vertex Shader to NV Vertex Program
48  *  http://developer.nvidia.com/view.asp?IO=vstovp
49  *
50  * NVIDIA: Memory Management with VAR
51  *  http://developer.nvidia.com/view.asp?IO=var_memory_management
52  */
53
54 /* TODO: Vertex and Pixel shaders are almost identical, the only exception being the way that some of the data is looked up or the availability of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
55 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't surprise me if the program can be cross compiled using a large body of shared code */
56
57 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
58
59 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
60     /* This table is not order or position dependent. */
61
62     /* Arithmetic */
63     {WINED3DSIO_NOP,    "nop",  "NOP", 0, 0, vshader_hw_map2gl,   NULL, 0, 0},
64     {WINED3DSIO_MOV,    "mov",  "MOV", 1, 2, vshader_hw_map2gl,   shader_glsl_mov, 0, 0},
65     {WINED3DSIO_MOVA,   "mova",  NULL, 1, 2, vshader_hw_map2gl,   shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
66     {WINED3DSIO_ADD,    "add",  "ADD", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
67     {WINED3DSIO_SUB,    "sub",  "SUB", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
68     {WINED3DSIO_MAD,    "mad",  "MAD", 1, 4, vshader_hw_map2gl,   shader_glsl_mad, 0, 0},
69     {WINED3DSIO_MUL,    "mul",  "MUL", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
70     {WINED3DSIO_RCP,    "rcp",  "RCP", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rcp, 0, 0},
71     {WINED3DSIO_RSQ,    "rsq",  "RSQ", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rsq, 0, 0},
72     {WINED3DSIO_DP3,    "dp3",  "DP3", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
73     {WINED3DSIO_DP4,    "dp4",  "DP4", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
74     {WINED3DSIO_MIN,    "min",  "MIN", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
75     {WINED3DSIO_MAX,    "max",  "MAX", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
76     {WINED3DSIO_SLT,    "slt",  "SLT", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
77     {WINED3DSIO_SGE,    "sge",  "SGE", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
78     {WINED3DSIO_ABS,    "abs",  "ABS", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
79     {WINED3DSIO_EXP,    "exp",  "EX2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
80     {WINED3DSIO_LOG,    "log",  "LG2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
81     {WINED3DSIO_EXPP,   "expp", "EXP", 1, 2, vshader_hw_map2gl,   shader_glsl_expp, 0, 0},
82     {WINED3DSIO_LOGP,   "logp", "LOG", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
83     {WINED3DSIO_LIT,    "lit",  "LIT", 1, 2, vshader_hw_map2gl,   shader_glsl_lit, 0, 0},
84     {WINED3DSIO_DST,    "dst",  "DST", 1, 3, vshader_hw_map2gl,   shader_glsl_dst, 0, 0},
85     {WINED3DSIO_LRP,    "lrp",  "LRP", 1, 4, NULL,                shader_glsl_lrp, 0, 0},
86     {WINED3DSIO_FRC,    "frc",  "FRC", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
87     {WINED3DSIO_POW,    "pow",  "POW", 1, 3, vshader_hw_map2gl,   shader_glsl_pow, 0, 0},
88     {WINED3DSIO_CRS,    "crs",  "XPD", 1, 3, vshader_hw_map2gl,   shader_glsl_cross, 0, 0},
89     /* TODO: sng can possibly be performed a  s
90         RCP tmp, vec
91         MUL out, tmp, vec*/
92     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
93     {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
94     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
95     {WINED3DSIO_SINCOS, "sincos",  "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
96     /* Matrix */
97     {WINED3DSIO_M4x4,   "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
98     {WINED3DSIO_M4x3,   "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
99     {WINED3DSIO_M3x4,   "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
100     {WINED3DSIO_M3x3,   "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
101     {WINED3DSIO_M3x2,   "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
102     /* Declare registers */
103     {WINED3DSIO_DCL,    "dcl",      NULL,                0, 2, NULL, NULL, 0, 0},
104     /* Constant definitions */
105     {WINED3DSIO_DEF,    "def",      NULL,                1, 5, NULL, NULL, 0, 0},
106     {WINED3DSIO_DEFB,   "defb",     GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
107     {WINED3DSIO_DEFI,   "defi",     GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
108     /* Flow control - requires GLSL or software shaders */
109     {WINED3DSIO_REP ,   "rep",      NULL, 0, 1, NULL, shader_glsl_rep,    WINED3DVS_VERSION(2,0), -1},
110     {WINED3DSIO_ENDREP, "endrep",   NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
111     {WINED3DSIO_IF,     "if",       NULL, 0, 1, NULL, shader_glsl_if,     WINED3DVS_VERSION(2,0), -1},
112     {WINED3DSIO_IFC,    "ifc",      NULL, 0, 2, NULL, shader_glsl_ifc,    WINED3DVS_VERSION(2,1), -1},
113     {WINED3DSIO_ELSE,   "else",     NULL, 0, 0, NULL, shader_glsl_else,   WINED3DVS_VERSION(2,0), -1},
114     {WINED3DSIO_ENDIF,  "endif",    NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
115     {WINED3DSIO_BREAK,  "break",    NULL, 0, 0, NULL, shader_glsl_break,  WINED3DVS_VERSION(2,1), -1},
116     {WINED3DSIO_BREAKC, "breakc",   NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
117     {WINED3DSIO_BREAKP, "breakp",   GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
118     {WINED3DSIO_CALL,   "call",     NULL, 0, 1, NULL, shader_glsl_call,   WINED3DVS_VERSION(2,0), -1},
119     {WINED3DSIO_CALLNZ, "callnz",   NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
120     {WINED3DSIO_LOOP,   "loop",     NULL, 0, 2, NULL, shader_glsl_loop,   WINED3DVS_VERSION(2,0), -1},
121     {WINED3DSIO_RET,    "ret",      NULL, 0, 0, NULL, NULL,               WINED3DVS_VERSION(2,0), -1},
122     {WINED3DSIO_ENDLOOP,"endloop",  NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
123     {WINED3DSIO_LABEL,  "label",    NULL, 0, 1, NULL, shader_glsl_label,  WINED3DVS_VERSION(2,0), -1},
124
125     {WINED3DSIO_SETP,   "setp",     GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
126     {WINED3DSIO_TEXLDL, "texldl",   NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
127     {0,                 NULL,       NULL,                0, 0, NULL, NULL, 0, 0}
128 };
129
130 static void vshader_set_limits(
131       IWineD3DVertexShaderImpl *This) {
132
133       This->baseShader.limits.texcoord = 0;
134       This->baseShader.limits.attributes = 16;
135       This->baseShader.limits.packed_input = 0;
136
137       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
138       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
139
140       switch (This->baseShader.hex_version) {
141           case WINED3DVS_VERSION(1,0):
142           case WINED3DVS_VERSION(1,1):
143                    This->baseShader.limits.temporary = 12;
144                    This->baseShader.limits.constant_bool = 0;
145                    This->baseShader.limits.constant_int = 0;
146                    This->baseShader.limits.address = 1;
147                    This->baseShader.limits.packed_output = 0;
148                    This->baseShader.limits.sampler = 0;
149                    This->baseShader.limits.label = 0;
150                    break;
151       
152           case WINED3DVS_VERSION(2,0):
153           case WINED3DVS_VERSION(2,1):
154                    This->baseShader.limits.temporary = 12;
155                    This->baseShader.limits.constant_bool = 16;
156                    This->baseShader.limits.constant_int = 16;
157                    This->baseShader.limits.address = 1;
158                    This->baseShader.limits.packed_output = 0;
159                    This->baseShader.limits.sampler = 0;
160                    This->baseShader.limits.label = 16;
161                    break;
162
163           case WINED3DVS_VERSION(3,0):
164                    This->baseShader.limits.temporary = 32;
165                    This->baseShader.limits.constant_bool = 32;
166                    This->baseShader.limits.constant_int = 32;
167                    This->baseShader.limits.address = 1;
168                    This->baseShader.limits.packed_output = 12;
169                    This->baseShader.limits.sampler = 4;
170                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
171                    break;
172
173           default: This->baseShader.limits.temporary = 12;
174                    This->baseShader.limits.constant_bool = 16;
175                    This->baseShader.limits.constant_int = 16;
176                    This->baseShader.limits.address = 1;
177                    This->baseShader.limits.packed_output = 0;
178                    This->baseShader.limits.sampler = 0;
179                    This->baseShader.limits.label = 16;
180                    FIXME("Unrecognized vertex shader version %#x\n",
181                        This->baseShader.hex_version);
182       }
183 }
184
185 /* This is an internal function,
186  * used to create fake semantics for shaders
187  * that don't have them - d3d8 shaders where the declaration
188  * stores the register for each input
189  */
190 static void vshader_set_input(
191     IWineD3DVertexShaderImpl* This,
192     unsigned int regnum,
193     BYTE usage, BYTE usage_idx) {
194
195     /* Fake usage: set reserved bit, usage, usage_idx */
196     DWORD usage_token = (0x1 << 31) |
197         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
198
199     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
200     DWORD reg_token = (0x1 << 31) |
201         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
202
203     This->semantics_in[regnum].usage = usage_token;
204     This->semantics_in[regnum].reg = reg_token;
205 }
206
207 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
208     if (usage_idx1 != usage_idx2) return FALSE;
209     if (usage1 == usage2) return TRUE;
210     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
211     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
212
213     return FALSE;
214 }
215
216 BOOL vshader_get_input(
217     IWineD3DVertexShader* iface,
218     BYTE usage_req, BYTE usage_idx_req,
219     unsigned int* regnum) {
220
221     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
222     int i;
223
224     for (i = 0; i < MAX_ATTRIBS; i++) {
225         DWORD usage_token = This->semantics_in[i].usage;
226         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
227         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
228
229         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
230             *regnum = i;
231             return TRUE;
232         }
233     }
234     return FALSE;
235 }
236
237 BOOL vshader_input_is_color(
238     IWineD3DVertexShader* iface,
239     unsigned int regnum) {
240
241     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
242
243     DWORD usage_token = This->semantics_in[regnum].usage;
244     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
245     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
246
247     int i;
248
249     for(i = 0; i < This->num_swizzled_attribs; i++) {
250         if(This->swizzled_attribs[i].usage == usage &&
251            This->swizzled_attribs[i].idx == usage_idx) {
252             return TRUE;
253         }
254     }
255     return FALSE;
256 }
257
258 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
259     UINT num = 0, i, j;
260     UINT numoldswizzles = This->num_swizzled_attribs;
261     IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
262
263     DWORD usage_token, usage, usage_idx;
264     BOOL found;
265
266     attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
267
268     /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
269     memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
270
271     memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
272
273     for(i = 0; i < decl->num_swizzled_attribs; i++) {
274         for(j = 0; j < MAX_ATTRIBS; j++) {
275
276             if(!This->baseShader.reg_maps.attributes[j]) continue;
277
278             usage_token = This->semantics_in[j].usage;
279             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
280             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
281
282             if(decl->swizzled_attribs[i].usage == usage &&
283                decl->swizzled_attribs[i].idx == usage_idx) {
284                 This->swizzled_attribs[num].usage = usage;
285                 This->swizzled_attribs[num].idx = usage_idx;
286                 num++;
287             }
288         }
289     }
290
291     /* Add previously converted attributes back in if they are not defined in the current declaration */
292     for(i = 0; i < numoldswizzles; i++) {
293
294         found = FALSE;
295         for(j = 0; j < decl->declarationWNumElements; j++) {
296             if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
297                oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
298                 found = TRUE;
299             }
300         }
301         if(found) {
302             /* This previously converted attribute is declared in the current declaration. Either it is
303              * already in the new array, or it should not be there. Skip it
304              */
305             continue;
306         }
307         /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
308          * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
309          * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
310          * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
311          * stays unswizzled as well because it isn't found in the oldswizzles array
312          */
313         for(j = 0; j < num; j++) {
314             if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
315                oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
316                oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
317                 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
318                          sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
319                 break;
320             }
321         }
322         This->swizzled_attribs[j].usage = oldswizzles[i].usage;
323         This->swizzled_attribs[j].idx = oldswizzles[i].idx;
324         num++;
325     }
326
327     TRACE("New swizzled attributes array\n");
328     for(i = 0; i < num; i++) {
329         TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
330               This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
331     }
332     This->num_swizzled_attribs = num;
333 }
334 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
335     or GLSL and send it to the card */
336 static VOID IWineD3DVertexShaderImpl_GenerateShader(
337     IWineD3DVertexShader *iface,
338     shader_reg_maps* reg_maps,
339     CONST DWORD *pFunction) {
340
341     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
342     IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
343     SHADER_BUFFER buffer;
344
345     find_swizzled_attribs(decl, This);
346
347 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
348         it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
349     if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
350         HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
351         This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
352         This->fixupVertexBufferSize = PGMSIZE;
353         This->fixupVertexBuffer[0] = 0;
354     }
355     buffer.buffer = This->device->fixupVertexBuffer;
356 #else
357     buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); 
358 #endif
359     buffer.bsize = 0;
360     buffer.lineNo = 0;
361     buffer.newline = TRUE;
362
363     if (This->baseShader.shader_mode == SHADER_GLSL) {
364
365         /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
366         GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
367
368         /* Base Declarations */
369         shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
370
371         /* Base Shader Body */
372         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
373
374         /* Unpack 3.0 outputs */
375         if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0)) {
376             shader_addline(&buffer, "order_ps_input(OUT);\n");
377         } else {
378             shader_addline(&buffer, "order_ps_input();\n");
379         }
380
381         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
382         if (!reg_maps->fog)
383             shader_addline(&buffer, "gl_FogFragCoord = gl_Position.z;\n");
384
385         /* Write the final position.
386          *
387          * OpenGL coordinates specify the center of the pixel while d3d coords specify
388          * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
389          * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
390          * contains 1.0 to allow a mad.
391          */
392         shader_addline(&buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n");
393         shader_addline(&buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n");
394
395         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
396          *
397          * Basically we want (in homogeneous coordinates) z = z * 2 - 1. However, shaders are run
398          * before the homogeneous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
399          * which is the same as z = z / 2 - w.
400          */
401         shader_addline(&buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
402
403         shader_addline(&buffer, "}\n");
404
405         TRACE("Compiling shader object %u\n", shader_obj);
406         GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
407         GL_EXTCALL(glCompileShaderARB(shader_obj));
408         print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
409
410         /* Store the shader object */
411         This->baseShader.prgId = shader_obj;
412
413     } else if (This->baseShader.shader_mode == SHADER_ARB) {
414
415         /*  Create the hw ARB shader */
416         shader_addline(&buffer, "!!ARBvp1.0\n");
417         shader_addline(&buffer, "PARAM helper_const = { 2.0, -1.0, %d.0, 0.0 };\n", This->rel_offset);
418
419         /* Mesa supports only 95 constants */
420         if (GL_VEND(MESA) || GL_VEND(WINE))
421             This->baseShader.limits.constant_float = 
422                 min(95, This->baseShader.limits.constant_float);
423
424         shader_addline(&buffer, "TEMP TMP;\n");
425
426         /* Base Declarations */
427         shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
428
429         /* We need a constant to fixup the final position */
430         shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
431
432         if((GLINFO_LOCATION).set_texcoord_w) {
433             int i;
434             for(i = 0; i < min(8, MAX_REG_TEXCRD); i++) {
435                 if(This->baseShader.reg_maps.texcoord_mask[i] != 0 &&
436                    This->baseShader.reg_maps.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) {
437                     shader_addline(&buffer, "MOV result.texcoord[%u].w, -helper_const.y;\n", i);
438                    }
439             }
440         }
441
442         /* Base Shader Body */
443         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
444
445         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
446         if (!reg_maps->fog)
447             shader_addline(&buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
448
449         /* Write the final position.
450          *
451          * OpenGL coordinates specify the center of the pixel while d3d coords specify
452          * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
453          * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
454          * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
455          */
456         shader_addline(&buffer, "MUL TMP, posFixup, TMP_OUT.w;\n");
457         shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, TMP.z;\n");
458         shader_addline(&buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TMP.w;\n");
459
460         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
461          * and the glsl equivalent
462          */
463         shader_addline(&buffer, "MAD TMP_OUT.z, TMP_OUT.z, helper_const.x, -TMP_OUT.w;\n");
464
465         shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
466         
467         shader_addline(&buffer, "END\n"); 
468
469         /* TODO: change to resource.glObjectHandle or something like that */
470         GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
471
472         TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
473         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
474
475         TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
476         /* Create the program and check for errors */
477         GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
478             buffer.bsize, buffer.buffer));
479
480         if (glGetError() == GL_INVALID_OPERATION) {
481             GLint errPos;
482             glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
483             FIXME("HW VertexShader Error at position %d: %s\n",
484                   errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
485             This->baseShader.prgId = -1;
486         }
487     }
488
489 #if 1 /* if were using the data buffer of device then we don't need to free it */
490   HeapFree(GetProcessHeap(), 0, buffer.buffer);
491 #endif
492 }
493
494 /* *******************************************
495    IWineD3DVertexShader IUnknown parts follow
496    ******************************************* */
497 static HRESULT  WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
498     return IWineD3DBaseShaderImpl_QueryInterface((IWineD3DBaseShader *) iface, riid, ppobj);
499 }
500
501 static ULONG  WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
502     return IWineD3DBaseShaderImpl_AddRef((IWineD3DBaseShader *) iface);
503 }
504
505 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
506     return IWineD3DBaseShaderImpl_Release((IWineD3DBaseShader *) iface);
507 }
508
509 /* *******************************************
510    IWineD3DVertexShader IWineD3DVertexShader parts follow
511    ******************************************* */
512
513 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
514     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
515     
516     *parent = This->parent;
517     IUnknown_AddRef(*parent);
518     TRACE("(%p) : returning %p\n", This, *parent);
519     return WINED3D_OK;
520 }
521
522 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
523     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
524     IWineD3DDevice_AddRef(This->baseShader.device);
525     *pDevice = This->baseShader.device;
526     TRACE("(%p) returning %p\n", This, *pDevice);
527     return WINED3D_OK;
528 }
529
530 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
531     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
532     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
533
534     if (NULL == pData) {
535         *pSizeOfData = This->baseShader.functionLength;
536         return WINED3D_OK;
537     }
538     if (*pSizeOfData < This->baseShader.functionLength) {
539         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
540          * than the required size we should write the required size and
541          * return D3DERR_MOREDATA. That's not actually true. */
542         return WINED3DERR_INVALIDCALL;
543     }
544     if (NULL == This->baseShader.function) { /* no function defined */
545         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
546         (*(DWORD **) pData) = NULL;
547     } else {
548         if(This->baseShader.functionLength == 0){
549
550         }
551         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
552         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
553     }
554     return WINED3D_OK;
555 }
556
557 /* Note that for vertex shaders CompileShader isn't called until the
558  * shader is first used. The reason for this is that we need the vertex
559  * declaration the shader will be used with in order to determine if
560  * the data in a register is of type D3DCOLOR, and needs swizzling. */
561 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
562
563     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
564     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
565     HRESULT hr;
566     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
567
568     TRACE("(%p) : pFunction %p\n", iface, pFunction);
569
570     /* First pass: trace shader */
571     shader_trace_init((IWineD3DBaseShader*) This, pFunction);
572     vshader_set_limits(This);
573
574     /* Initialize immediate constant lists */
575     list_init(&This->baseShader.constantsF);
576     list_init(&This->baseShader.constantsB);
577     list_init(&This->baseShader.constantsI);
578
579     /* Second pass: figure out registers used, semantics, etc.. */
580     This->min_rel_offset = GL_LIMITS(vshader_constantsF);
581     This->max_rel_offset = 0;
582     memset(reg_maps, 0, sizeof(shader_reg_maps));
583     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
584        This->semantics_in, This->semantics_out, pFunction, NULL);
585     if (hr != WINED3D_OK) return hr;
586
587     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
588
589     if(deviceImpl->vs_selected_mode == SHADER_ARB &&
590        (GLINFO_LOCATION).arb_vs_offset_limit      &&
591        This->min_rel_offset <= This->max_rel_offset) {
592
593         if(This->max_rel_offset - This->min_rel_offset > 127) {
594             FIXME("The difference between the minimum and maximum relative offset is > 127\n");
595             FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
596             FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
597         } else if(This->max_rel_offset - This->min_rel_offset > 63) {
598             This->rel_offset = This->min_rel_offset + 63;
599         } else if(This->max_rel_offset > 63) {
600             This->rel_offset = This->min_rel_offset;
601         } else {
602             This->rel_offset = 0;
603         }
604     }
605     This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
606
607     /* copy the function ... because it will certainly be released by application */
608     if (NULL != pFunction) {
609         void *function;
610
611         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
612         if (!function) return E_OUTOFMEMORY;
613         memcpy(function, pFunction, This->baseShader.functionLength);
614         This->baseShader.function = function;
615     } else {
616         This->baseShader.function = NULL;
617     }
618
619     return WINED3D_OK;
620 }
621
622 /* Preload semantics for d3d8 shaders */
623 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
624     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
625     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
626
627     int i;
628     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
629         WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
630         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
631     }
632 }
633
634 /* Set local constants for d3d8 shaders */
635 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
636         UINT start_idx, const float *src_data, UINT count) {
637     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
638     UINT i, end_idx;
639
640     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
641
642     end_idx = start_idx + count;
643     if (end_idx > GL_LIMITS(vshader_constantsF)) {
644         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
645         end_idx = GL_LIMITS(vshader_constantsF);
646     }
647
648     for (i = start_idx; i < end_idx; ++i) {
649         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
650         if (!lconst) return E_OUTOFMEMORY;
651
652         lconst->idx = i;
653         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
654         list_add_head(&This->baseShader.constantsF, &lconst->entry);
655     }
656
657     return WINED3D_OK;
658 }
659
660 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
661     UINT i, j, k;
662     BOOL found;
663
664     DWORD usage_token;
665     DWORD usage;
666     DWORD usage_idx;
667
668     for(i = 0; i < vdecl->declarationWNumElements; i++) {
669         /* Ignore tesselated streams and the termination entry(position0, stream 255, unused) */
670         if(vdecl->pDeclarationWine[i].Stream >= MAX_STREAMS ||
671            vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_UNUSED) continue;
672
673         for(j = 0; j < MAX_ATTRIBS; j++) {
674             if(!This->baseShader.reg_maps.attributes[j]) continue;
675
676             usage_token = This->semantics_in[j].usage;
677             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
678             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
679
680             if(vdecl->pDeclarationWine[i].Usage != usage ||
681                vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
682                 continue;
683             }
684
685             found = FALSE;
686             for(k = 0; k < This->num_swizzled_attribs; k++) {
687                 if(This->swizzled_attribs[k].usage == usage &&
688                     This->swizzled_attribs[k].idx == usage_idx) {
689                     found = TRUE;
690                 }
691             }
692             if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
693                 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
694                       debug_d3ddeclusage(usage), usage_idx);
695                 return TRUE;
696             }
697             if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
698                 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
699                       debug_d3ddeclusage(usage), usage_idx);
700                 return TRUE;
701             }
702         }
703     }
704     return FALSE;
705 }
706
707 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
708     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
709     IWineD3DVertexDeclarationImpl *vdecl;
710     CONST DWORD *function = This->baseShader.function;
711     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
712
713     TRACE("(%p) : function %p\n", iface, function);
714
715     /* We're already compiled. */
716     if (This->baseShader.is_compiled) {
717         vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
718
719         if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
720            memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
721
722             /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
723              * we have to recompile, but we have to take a deeper look at see if the attribs that differ
724              * are declared in the decl and used in the shader
725              */
726             if(swizzled_attribs_differ(This, vdecl)) {
727                 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
728                 goto recompile;
729             }
730             WARN("Swizzled attribute validation required an expensive comparison\n");
731         }
732
733         return WINED3D_OK;
734
735         recompile:
736         if(This->recompile_count < 50) {
737             This->recompile_count++;
738         } else {
739             FIXME("Vertexshader %p recompiled more than 50 times\n", This);
740         }
741
742         deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
743     }
744
745     /* We don't need to compile */
746     if (!function) {
747         This->baseShader.is_compiled = TRUE;
748         return WINED3D_OK;
749     }
750
751     /* Generate the HW shader */
752     TRACE("(%p) : Generating hardware program\n", This);
753     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
754
755     This->baseShader.is_compiled = TRUE;
756
757     return WINED3D_OK;
758 }
759
760 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
761 {
762     /*** IUnknown methods ***/
763     IWineD3DVertexShaderImpl_QueryInterface,
764     IWineD3DVertexShaderImpl_AddRef,
765     IWineD3DVertexShaderImpl_Release,
766     /*** IWineD3DBase methods ***/
767     IWineD3DVertexShaderImpl_GetParent,
768     /*** IWineD3DBaseShader methods ***/
769     IWineD3DVertexShaderImpl_SetFunction,
770     IWineD3DVertexShaderImpl_CompileShader,
771     /*** IWineD3DVertexShader methods ***/
772     IWineD3DVertexShaderImpl_GetDevice,
773     IWineD3DVertexShaderImpl_GetFunction,
774     IWineD3DVertexShaderImpl_FakeSemantics,
775     IWIneD3DVertexShaderImpl_SetLocalConstantsF
776 };