wined3d: Multiply the half pixel correction with .w.
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24
25 #include "config.h"
26
27 #include <math.h>
28 #include <stdio.h>
29
30 #include "wined3d_private.h"
31
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
35
36 /* Shader debugging - Change the following line to enable debugging of software
37       vertex shaders                                                             */
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
45
46 /**
47  * DirectX9 SDK download
48  *  http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
49  *
50  * Exploring D3DX
51  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
52  *
53  * Using Vertex Shaders
54  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
55  *
56  * Dx9 New
57  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
58  *
59  * Dx9 Shaders
60  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
64  *
65  * Dx9 D3DX
66  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
67  *
68  * FVF
69  *  http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
70  *
71  * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72  *  http://developer.nvidia.com/view.asp?IO=vstovp
73  *
74  * NVIDIA: Memory Management with VAR
75  *  http://developer.nvidia.com/view.asp?IO=var_memory_management
76  */
77
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
80
81 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
82
83 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
84     /* This table is not order or position dependent. */
85
86     /* Arithmetic */
87     {WINED3DSIO_NOP,    "nop",  "NOP", 0, 0, vshader_hw_map2gl,   NULL, 0, 0},
88     {WINED3DSIO_MOV,    "mov",  "MOV", 1, 2, vshader_hw_map2gl,   shader_glsl_mov, 0, 0},
89     {WINED3DSIO_MOVA,   "mova",  NULL, 1, 2, vshader_hw_map2gl,   shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
90     {WINED3DSIO_ADD,    "add",  "ADD", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
91     {WINED3DSIO_SUB,    "sub",  "SUB", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
92     {WINED3DSIO_MAD,    "mad",  "MAD", 1, 4, vshader_hw_map2gl,   shader_glsl_mad, 0, 0},
93     {WINED3DSIO_MUL,    "mul",  "MUL", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
94     {WINED3DSIO_RCP,    "rcp",  "RCP", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rcp, 0, 0},
95     {WINED3DSIO_RSQ,    "rsq",  "RSQ", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rsq, 0, 0},
96     {WINED3DSIO_DP3,    "dp3",  "DP3", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
97     {WINED3DSIO_DP4,    "dp4",  "DP4", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
98     {WINED3DSIO_MIN,    "min",  "MIN", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
99     {WINED3DSIO_MAX,    "max",  "MAX", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
100     {WINED3DSIO_SLT,    "slt",  "SLT", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
101     {WINED3DSIO_SGE,    "sge",  "SGE", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
102     {WINED3DSIO_ABS,    "abs",  "ABS", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
103     {WINED3DSIO_EXP,    "exp",  "EX2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
104     {WINED3DSIO_LOG,    "log",  "LG2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
105     {WINED3DSIO_EXPP,   "expp", "EXP", 1, 2, vshader_hw_map2gl,   shader_glsl_expp, 0, 0},
106     {WINED3DSIO_LOGP,   "logp", "LOG", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
107     {WINED3DSIO_LIT,    "lit",  "LIT", 1, 2, vshader_hw_map2gl,   shader_glsl_lit, 0, 0},
108     {WINED3DSIO_DST,    "dst",  "DST", 1, 3, vshader_hw_map2gl,   shader_glsl_dst, 0, 0},
109     {WINED3DSIO_LRP,    "lrp",  "LRP", 1, 4, NULL,                shader_glsl_lrp, 0, 0},
110     {WINED3DSIO_FRC,    "frc",  "FRC", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
111     {WINED3DSIO_POW,    "pow",  "POW", 1, 3, vshader_hw_map2gl,   shader_glsl_pow, 0, 0},
112     {WINED3DSIO_CRS,    "crs",  "XPD", 1, 3, vshader_hw_map2gl,   shader_glsl_cross, 0, 0},
113     /* TODO: sng can possibly be performed a  s
114         RCP tmp, vec
115         MUL out, tmp, vec*/
116     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
117     {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
118     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
119     {WINED3DSIO_SINCOS, "sincos",  "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
120     /* Matrix */
121     {WINED3DSIO_M4x4,   "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
122     {WINED3DSIO_M4x3,   "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
123     {WINED3DSIO_M3x4,   "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
124     {WINED3DSIO_M3x3,   "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
125     {WINED3DSIO_M3x2,   "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
126     /* Declare registers */
127     {WINED3DSIO_DCL,    "dcl",      NULL,                0, 2, NULL, NULL, 0, 0},
128     /* Constant definitions */
129     {WINED3DSIO_DEF,    "def",      NULL,                1, 5, NULL, NULL, 0, 0},
130     {WINED3DSIO_DEFB,   "defb",     GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
131     {WINED3DSIO_DEFI,   "defi",     GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
132     /* Flow control - requires GLSL or software shaders */
133     {WINED3DSIO_REP ,   "rep",      NULL, 0, 1, NULL, shader_glsl_rep,    WINED3DVS_VERSION(2,0), -1},
134     {WINED3DSIO_ENDREP, "endrep",   NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
135     {WINED3DSIO_IF,     "if",       NULL, 0, 1, NULL, shader_glsl_if,     WINED3DVS_VERSION(2,0), -1},
136     {WINED3DSIO_IFC,    "ifc",      NULL, 0, 2, NULL, shader_glsl_ifc,    WINED3DVS_VERSION(2,1), -1},
137     {WINED3DSIO_ELSE,   "else",     NULL, 0, 0, NULL, shader_glsl_else,   WINED3DVS_VERSION(2,0), -1},
138     {WINED3DSIO_ENDIF,  "endif",    NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
139     {WINED3DSIO_BREAK,  "break",    NULL, 0, 0, NULL, shader_glsl_break,  WINED3DVS_VERSION(2,1), -1},
140     {WINED3DSIO_BREAKC, "breakc",   NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
141     {WINED3DSIO_BREAKP, "breakp",   GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
142     {WINED3DSIO_CALL,   "call",     NULL, 0, 1, NULL, shader_glsl_call,   WINED3DVS_VERSION(2,0), -1},
143     {WINED3DSIO_CALLNZ, "callnz",   NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
144     {WINED3DSIO_LOOP,   "loop",     NULL, 0, 2, NULL, shader_glsl_loop,   WINED3DVS_VERSION(2,0), -1},
145     {WINED3DSIO_RET,    "ret",      NULL, 0, 0, NULL, NULL,               WINED3DVS_VERSION(2,0), -1},
146     {WINED3DSIO_ENDLOOP,"endloop",  NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
147     {WINED3DSIO_LABEL,  "label",    NULL, 0, 1, NULL, shader_glsl_label,  WINED3DVS_VERSION(2,0), -1},
148
149     {WINED3DSIO_SETP,   "setp",     GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
150     {WINED3DSIO_TEXLDL, "texldl",   NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
151     {0,                 NULL,       NULL,                0, 0, NULL, NULL, 0, 0}
152 };
153
154 static void vshader_set_limits(
155       IWineD3DVertexShaderImpl *This) {
156
157       This->baseShader.limits.texcoord = 0;
158       This->baseShader.limits.attributes = 16;
159       This->baseShader.limits.packed_input = 0;
160
161       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
162       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
163
164       switch (This->baseShader.hex_version) {
165           case WINED3DVS_VERSION(1,0):
166           case WINED3DVS_VERSION(1,1):
167                    This->baseShader.limits.temporary = 12;
168                    This->baseShader.limits.constant_bool = 0;
169                    This->baseShader.limits.constant_int = 0;
170                    This->baseShader.limits.address = 1;
171                    This->baseShader.limits.packed_output = 0;
172                    This->baseShader.limits.sampler = 0;
173                    This->baseShader.limits.label = 0;
174                    break;
175       
176           case WINED3DVS_VERSION(2,0):
177           case WINED3DVS_VERSION(2,1):
178                    This->baseShader.limits.temporary = 12;
179                    This->baseShader.limits.constant_bool = 16;
180                    This->baseShader.limits.constant_int = 16;
181                    This->baseShader.limits.address = 1;
182                    This->baseShader.limits.packed_output = 0;
183                    This->baseShader.limits.sampler = 0;
184                    This->baseShader.limits.label = 16;
185                    break;
186
187           case WINED3DVS_VERSION(3,0):
188                    This->baseShader.limits.temporary = 32;
189                    This->baseShader.limits.constant_bool = 32;
190                    This->baseShader.limits.constant_int = 32;
191                    This->baseShader.limits.address = 1;
192                    This->baseShader.limits.packed_output = 12;
193                    This->baseShader.limits.sampler = 4;
194                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
195                    break;
196
197           default: This->baseShader.limits.temporary = 12;
198                    This->baseShader.limits.constant_bool = 16;
199                    This->baseShader.limits.constant_int = 16;
200                    This->baseShader.limits.address = 1;
201                    This->baseShader.limits.packed_output = 0;
202                    This->baseShader.limits.sampler = 0;
203                    This->baseShader.limits.label = 16;
204                    FIXME("Unrecognized vertex shader version %#x\n",
205                        This->baseShader.hex_version);
206       }
207 }
208
209 /* This is an internal function,
210  * used to create fake semantics for shaders
211  * that don't have them - d3d8 shaders where the declaration
212  * stores the register for each input
213  */
214 static void vshader_set_input(
215     IWineD3DVertexShaderImpl* This,
216     unsigned int regnum,
217     BYTE usage, BYTE usage_idx) {
218
219     /* Fake usage: set reserved bit, usage, usage_idx */
220     DWORD usage_token = (0x1 << 31) |
221         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
222
223     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
224     DWORD reg_token = (0x1 << 31) |
225         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
226
227     This->semantics_in[regnum].usage = usage_token;
228     This->semantics_in[regnum].reg = reg_token;
229 }
230
231 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
232     if (usage_idx1 != usage_idx2) return FALSE;
233     if (usage1 == usage2) return TRUE;
234     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
235     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
236
237     return FALSE;
238 }
239
240 BOOL vshader_get_input(
241     IWineD3DVertexShader* iface,
242     BYTE usage_req, BYTE usage_idx_req,
243     unsigned int* regnum) {
244
245     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
246     int i;
247
248     for (i = 0; i < MAX_ATTRIBS; i++) {
249         DWORD usage_token = This->semantics_in[i].usage;
250         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
251         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
252
253         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
254             *regnum = i;
255             return TRUE;
256         }
257     }
258     return FALSE;
259 }
260
261 BOOL vshader_input_is_color(
262     IWineD3DVertexShader* iface,
263     unsigned int regnum) {
264
265     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
266
267     DWORD usage_token = This->semantics_in[regnum].usage;
268     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
269     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
270
271     int i;
272
273     for(i = 0; i < This->num_swizzled_attribs; i++) {
274         if(This->swizzled_attribs[i].usage == usage &&
275            This->swizzled_attribs[i].idx == usage_idx) {
276             return TRUE;
277         }
278     }
279     return FALSE;
280 }
281
282 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
283     UINT num = 0, i, j;
284     UINT numoldswizzles = This->num_swizzled_attribs;
285     IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
286
287     DWORD usage_token, usage, usage_idx;
288     BOOL found;
289
290     attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
291
292     /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
293     memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
294
295     memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
296
297     for(i = 0; i < decl->num_swizzled_attribs; i++) {
298         for(j = 0; j < MAX_ATTRIBS; j++) {
299
300             if(!This->baseShader.reg_maps.attributes[j]) continue;
301
302             usage_token = This->semantics_in[j].usage;
303             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
304             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
305
306             if(decl->swizzled_attribs[i].usage == usage &&
307                decl->swizzled_attribs[i].idx == usage_idx) {
308                 This->swizzled_attribs[num].usage = usage;
309                 This->swizzled_attribs[num].idx = usage_idx;
310                 num++;
311             }
312         }
313     }
314
315     /* Add previously converted attributes back in if they are not defined in the current declaration */
316     for(i = 0; i < numoldswizzles; i++) {
317
318         found = FALSE;
319         for(j = 0; j < decl->declarationWNumElements; j++) {
320             if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
321                oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
322                 found = TRUE;
323             }
324         }
325         if(found) {
326             /* This previously converted attribute is declared in the current declaration. Either it is
327              * already in the new array, or it should not be there. Skip it
328              */
329             continue;
330         }
331         /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
332          * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
333          * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
334          * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
335          * stays unswizzled as well because it isn't found in the oldswizzles array
336          */
337         for(j = 0; j < num; j++) {
338             if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
339                oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
340                oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
341                 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
342                          sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
343                 break;
344             }
345         }
346         This->swizzled_attribs[j].usage = oldswizzles[i].usage;
347         This->swizzled_attribs[j].idx = oldswizzles[i].idx;
348         num++;
349     }
350
351     TRACE("New swizzled attributes array\n");
352     for(i = 0; i < num; i++) {
353         TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
354               This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
355     }
356     This->num_swizzled_attribs = num;
357 }
358 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
359     or GLSL and send it to the card */
360 static VOID IWineD3DVertexShaderImpl_GenerateShader(
361     IWineD3DVertexShader *iface,
362     shader_reg_maps* reg_maps,
363     CONST DWORD *pFunction) {
364
365     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
366     IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
367     SHADER_BUFFER buffer;
368
369     find_swizzled_attribs(decl, This);
370
371 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
372         it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
373     if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
374         HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
375         This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
376         This->fixupVertexBufferSize = PGMSIZE;
377         This->fixupVertexBuffer[0] = 0;
378     }
379     buffer.buffer = This->device->fixupVertexBuffer;
380 #else
381     buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); 
382 #endif
383     buffer.bsize = 0;
384     buffer.lineNo = 0;
385     buffer.newline = TRUE;
386
387     if (This->baseShader.shader_mode == SHADER_GLSL) {
388
389         /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
390         GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
391
392         /* Base Declarations */
393         shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
394
395         /* Base Shader Body */
396         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
397
398         /* Unpack 3.0 outputs */
399         if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0)) {
400             shader_addline(&buffer, "order_ps_input(OUT);\n");
401         } else {
402             shader_addline(&buffer, "order_ps_input();\n");
403         }
404
405         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
406         if (!reg_maps->fog)
407             shader_addline(&buffer, "gl_FogFragCoord = gl_Position.z;\n");
408
409         /* Write the final position.
410          *
411          * OpenGL coordinates specify the center of the pixel while d3d coords specify
412          * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
413          * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
414          * contains 1.0 to allow a mad.
415          */
416         shader_addline(&buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n");
417         shader_addline(&buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n");
418
419         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
420          *
421          * Basically we want(in homogenous coordinates) z = z * 2 - 1. However, shaders are run
422          * before the homogenous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
423          * which is the same as z = z / 2 - w.
424          */
425         shader_addline(&buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
426
427         shader_addline(&buffer, "}\n");
428
429         TRACE("Compiling shader object %u\n", shader_obj);
430         GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
431         GL_EXTCALL(glCompileShaderARB(shader_obj));
432         print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
433
434         /* Store the shader object */
435         This->baseShader.prgId = shader_obj;
436
437     } else if (This->baseShader.shader_mode == SHADER_ARB) {
438
439         /*  Create the hw ARB shader */
440         shader_addline(&buffer, "!!ARBvp1.0\n");
441         shader_addline(&buffer, "PARAM helper_const = { 2.0, -1.0, %d.0, 0.0 };\n", This->rel_offset);
442
443         /* Mesa supports only 95 constants */
444         if (GL_VEND(MESA) || GL_VEND(WINE))
445             This->baseShader.limits.constant_float = 
446                 min(95, This->baseShader.limits.constant_float);
447
448         shader_addline(&buffer, "TEMP TMP;\n");
449
450         /* Base Declarations */
451         shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
452
453         /* We need a constant to fixup the final position */
454         shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
455
456         if((GLINFO_LOCATION).set_texcoord_w) {
457             int i;
458             for(i = 0; i < min(8, MAX_REG_TEXCRD); i++) {
459                 if(This->baseShader.reg_maps.texcoord_mask[i] != 0 &&
460                    This->baseShader.reg_maps.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) {
461                     shader_addline(&buffer, "MOV result.texcoord[%u].w, -helper_const.y;\n", i);
462                    }
463             }
464         }
465
466         /* Base Shader Body */
467         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
468
469         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
470         if (!reg_maps->fog)
471             shader_addline(&buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
472
473         /* Write the final position.
474          *
475          * OpenGL coordinates specify the center of the pixel while d3d coords specify
476          * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
477          * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
478          * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
479          */
480         shader_addline(&buffer, "MUL TMP, posFixup, TMP_OUT.w;\n");
481         shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, TMP.z;\n");
482         shader_addline(&buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TMP.w;\n");
483
484         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
485          * and the glsl equivalent
486          */
487         shader_addline(&buffer, "MAD TMP_OUT.z, TMP_OUT.z, helper_const.x, -TMP_OUT.w;\n");
488
489         shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
490         
491         shader_addline(&buffer, "END\n"); 
492
493         /* TODO: change to resource.glObjectHandle or something like that */
494         GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
495
496         TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
497         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
498
499         TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
500         /* Create the program and check for errors */
501         GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
502             buffer.bsize, buffer.buffer));
503
504         if (glGetError() == GL_INVALID_OPERATION) {
505             GLint errPos;
506             glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
507             FIXME("HW VertexShader Error at position %d: %s\n",
508                   errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
509             This->baseShader.prgId = -1;
510         }
511     }
512
513 #if 1 /* if were using the data buffer of device then we don't need to free it */
514   HeapFree(GetProcessHeap(), 0, buffer.buffer);
515 #endif
516 }
517
518 /* *******************************************
519    IWineD3DVertexShader IUnknown parts follow
520    ******************************************* */
521 static HRESULT  WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
522     return IWineD3DBaseShaderImpl_QueryInterface((IWineD3DBaseShader *) iface, riid, ppobj);
523 }
524
525 static ULONG  WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
526     return IWineD3DBaseShaderImpl_AddRef((IWineD3DBaseShader *) iface);
527 }
528
529 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
530     return IWineD3DBaseShaderImpl_Release((IWineD3DBaseShader *) iface);
531 }
532
533 /* *******************************************
534    IWineD3DVertexShader IWineD3DVertexShader parts follow
535    ******************************************* */
536
537 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
538     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
539     
540     *parent = This->parent;
541     IUnknown_AddRef(*parent);
542     TRACE("(%p) : returning %p\n", This, *parent);
543     return WINED3D_OK;
544 }
545
546 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
547     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
548     IWineD3DDevice_AddRef(This->baseShader.device);
549     *pDevice = This->baseShader.device;
550     TRACE("(%p) returning %p\n", This, *pDevice);
551     return WINED3D_OK;
552 }
553
554 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
555     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
556     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
557
558     if (NULL == pData) {
559         *pSizeOfData = This->baseShader.functionLength;
560         return WINED3D_OK;
561     }
562     if (*pSizeOfData < This->baseShader.functionLength) {
563         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
564          * than the required size we should write the required size and
565          * return D3DERR_MOREDATA. That's not actually true. */
566         return WINED3DERR_INVALIDCALL;
567     }
568     if (NULL == This->baseShader.function) { /* no function defined */
569         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
570         (*(DWORD **) pData) = NULL;
571     } else {
572         if(This->baseShader.functionLength == 0){
573
574         }
575         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
576         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
577     }
578     return WINED3D_OK;
579 }
580
581 /* Note that for vertex shaders CompileShader isn't called until the
582  * shader is first used. The reason for this is that we need the vertex
583  * declaration the shader will be used with in order to determine if
584  * the data in a register is of type D3DCOLOR, and needs swizzling. */
585 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
586
587     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
588     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
589     HRESULT hr;
590     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
591
592     TRACE("(%p) : pFunction %p\n", iface, pFunction);
593
594     /* First pass: trace shader */
595     shader_trace_init((IWineD3DBaseShader*) This, pFunction);
596     vshader_set_limits(This);
597
598     /* Initialize immediate constant lists */
599     list_init(&This->baseShader.constantsF);
600     list_init(&This->baseShader.constantsB);
601     list_init(&This->baseShader.constantsI);
602
603     /* Second pass: figure out registers used, semantics, etc.. */
604     This->min_rel_offset = GL_LIMITS(vshader_constantsF);
605     This->max_rel_offset = 0;
606     memset(reg_maps, 0, sizeof(shader_reg_maps));
607     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
608        This->semantics_in, This->semantics_out, pFunction, NULL);
609     if (hr != WINED3D_OK) return hr;
610
611     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
612
613     if(deviceImpl->vs_selected_mode == SHADER_ARB &&
614        (GLINFO_LOCATION).arb_vs_offset_limit      &&
615        This->min_rel_offset <= This->max_rel_offset) {
616
617         if(This->max_rel_offset - This->min_rel_offset > 127) {
618             FIXME("The difference between the minimum and maximum relative offset is > 127\n");
619             FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
620             FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
621         } else if(This->max_rel_offset - This->min_rel_offset > 63) {
622             This->rel_offset = This->min_rel_offset + 63;
623         } else if(This->max_rel_offset > 63) {
624             This->rel_offset = This->min_rel_offset;
625         } else {
626             This->rel_offset = 0;
627         }
628     }
629     This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
630
631     /* copy the function ... because it will certainly be released by application */
632     if (NULL != pFunction) {
633         void *function;
634
635         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
636         if (!function) return E_OUTOFMEMORY;
637         memcpy(function, pFunction, This->baseShader.functionLength);
638         This->baseShader.function = function;
639     } else {
640         This->baseShader.function = NULL;
641     }
642
643     return WINED3D_OK;
644 }
645
646 /* Preload semantics for d3d8 shaders */
647 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
648     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
649     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
650
651     int i;
652     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
653         WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
654         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
655     }
656 }
657
658 /* Set local constants for d3d8 shaders */
659 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
660         UINT start_idx, const float *src_data, UINT count) {
661     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
662     UINT i, end_idx;
663
664     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
665
666     end_idx = start_idx + count;
667     if (end_idx > GL_LIMITS(vshader_constantsF)) {
668         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
669         end_idx = GL_LIMITS(vshader_constantsF);
670     }
671
672     for (i = start_idx; i < end_idx; ++i) {
673         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
674         if (!lconst) return E_OUTOFMEMORY;
675
676         lconst->idx = i;
677         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
678         list_add_head(&This->baseShader.constantsF, &lconst->entry);
679     }
680
681     return WINED3D_OK;
682 }
683
684 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
685     UINT i, j, k;
686     BOOL found;
687
688     DWORD usage_token;
689     DWORD usage;
690     DWORD usage_idx;
691
692     for(i = 0; i < vdecl->declarationWNumElements; i++) {
693         /* Ignore tesselated streams and the termination entry(position0, stream 255, unused) */
694         if(vdecl->pDeclarationWine[i].Stream >= MAX_STREAMS ||
695            vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_UNUSED) continue;
696
697         for(j = 0; j < MAX_ATTRIBS; j++) {
698             if(!This->baseShader.reg_maps.attributes[j]) continue;
699
700             usage_token = This->semantics_in[j].usage;
701             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
702             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
703
704             if(vdecl->pDeclarationWine[i].Usage != usage ||
705                vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
706                 continue;
707             }
708
709             found = FALSE;
710             for(k = 0; k < This->num_swizzled_attribs; k++) {
711                 if(This->swizzled_attribs[k].usage == usage &&
712                     This->swizzled_attribs[k].idx == usage_idx) {
713                     found = TRUE;
714                 }
715             }
716             if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
717                 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
718                       debug_d3ddeclusage(usage), usage_idx);
719                 return TRUE;
720             }
721             if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
722                 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
723                       debug_d3ddeclusage(usage), usage_idx);
724                 return TRUE;
725             }
726         }
727     }
728     return FALSE;
729 }
730
731 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
732     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
733     IWineD3DVertexDeclarationImpl *vdecl;
734     CONST DWORD *function = This->baseShader.function;
735     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
736
737     TRACE("(%p) : function %p\n", iface, function);
738
739     /* We're already compiled. */
740     if (This->baseShader.is_compiled) {
741         vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
742
743         if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
744            memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
745
746             /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
747              * we have to recompile, but we have to take a deeper look at see if the attribs that differ
748              * are declared in the decl and used in the shader
749              */
750             if(swizzled_attribs_differ(This, vdecl)) {
751                 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
752                 goto recompile;
753             }
754             WARN("Swizzled attribute validation required an expensive comparison\n");
755         }
756
757         return WINED3D_OK;
758
759         recompile:
760         if(This->recompile_count < 50) {
761             This->recompile_count++;
762         } else {
763             FIXME("Vertexshader %p recompiled more than 50 times\n", This);
764         }
765
766         deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
767     }
768
769     /* We don't need to compile */
770     if (!function) {
771         This->baseShader.is_compiled = TRUE;
772         return WINED3D_OK;
773     }
774
775     /* Generate the HW shader */
776     TRACE("(%p) : Generating hardware program\n", This);
777     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
778
779     This->baseShader.is_compiled = TRUE;
780
781     return WINED3D_OK;
782 }
783
784 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
785 {
786     /*** IUnknown methods ***/
787     IWineD3DVertexShaderImpl_QueryInterface,
788     IWineD3DVertexShaderImpl_AddRef,
789     IWineD3DVertexShaderImpl_Release,
790     /*** IWineD3DBase methods ***/
791     IWineD3DVertexShaderImpl_GetParent,
792     /*** IWineD3DBaseShader methods ***/
793     IWineD3DVertexShaderImpl_SetFunction,
794     IWineD3DVertexShaderImpl_CompileShader,
795     /*** IWineD3DVertexShader methods ***/
796     IWineD3DVertexShaderImpl_GetDevice,
797     IWineD3DVertexShaderImpl_GetFunction,
798     IWineD3DVertexShaderImpl_FakeSemantics,
799     IWIneD3DVertexShaderImpl_SetLocalConstantsF
800 };