mshtml: Added IHTMLStyleSheet::get_rules implementation.
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24
25 #include "config.h"
26
27 #include <math.h>
28 #include <stdio.h>
29
30 #include "wined3d_private.h"
31
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
35
36 /* Shader debugging - Change the following line to enable debugging of software
37       vertex shaders                                                             */
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
45
46 /**
47  * DirectX9 SDK download
48  *  http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
49  *
50  * Exploring D3DX
51  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
52  *
53  * Using Vertex Shaders
54  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
55  *
56  * Dx9 New
57  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
58  *
59  * Dx9 Shaders
60  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
64  *
65  * Dx9 D3DX
66  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
67  *
68  * FVF
69  *  http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
70  *
71  * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72  *  http://developer.nvidia.com/view.asp?IO=vstovp
73  *
74  * NVIDIA: Memory Management with VAR
75  *  http://developer.nvidia.com/view.asp?IO=var_memory_management
76  */
77
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
80
81 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
82
83 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
84     /* This table is not order or position dependent. */
85
86     /* Arithmetic */
87     {WINED3DSIO_NOP,    "nop",  "NOP", 0, 0, vshader_hw_map2gl,   NULL, 0, 0},
88     {WINED3DSIO_MOV,    "mov",  "MOV", 1, 2, vshader_hw_map2gl,   shader_glsl_mov, 0, 0},
89     {WINED3DSIO_MOVA,   "mova",  NULL, 1, 2, vshader_hw_map2gl,   shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
90     {WINED3DSIO_ADD,    "add",  "ADD", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
91     {WINED3DSIO_SUB,    "sub",  "SUB", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
92     {WINED3DSIO_MAD,    "mad",  "MAD", 1, 4, vshader_hw_map2gl,   shader_glsl_mad, 0, 0},
93     {WINED3DSIO_MUL,    "mul",  "MUL", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
94     {WINED3DSIO_RCP,    "rcp",  "RCP", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rcp, 0, 0},
95     {WINED3DSIO_RSQ,    "rsq",  "RSQ", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rsq, 0, 0},
96     {WINED3DSIO_DP3,    "dp3",  "DP3", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
97     {WINED3DSIO_DP4,    "dp4",  "DP4", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
98     {WINED3DSIO_MIN,    "min",  "MIN", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
99     {WINED3DSIO_MAX,    "max",  "MAX", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
100     {WINED3DSIO_SLT,    "slt",  "SLT", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
101     {WINED3DSIO_SGE,    "sge",  "SGE", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
102     {WINED3DSIO_ABS,    "abs",  "ABS", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
103     {WINED3DSIO_EXP,    "exp",  "EX2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
104     {WINED3DSIO_LOG,    "log",  "LG2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
105     {WINED3DSIO_EXPP,   "expp", "EXP", 1, 2, vshader_hw_map2gl,   shader_glsl_expp, 0, 0},
106     {WINED3DSIO_LOGP,   "logp", "LOG", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
107     {WINED3DSIO_LIT,    "lit",  "LIT", 1, 2, vshader_hw_map2gl,   shader_glsl_lit, 0, 0},
108     {WINED3DSIO_DST,    "dst",  "DST", 1, 3, vshader_hw_map2gl,   shader_glsl_dst, 0, 0},
109     {WINED3DSIO_LRP,    "lrp",  "LRP", 1, 4, NULL,                shader_glsl_lrp, 0, 0},
110     {WINED3DSIO_FRC,    "frc",  "FRC", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
111     {WINED3DSIO_POW,    "pow",  "POW", 1, 3, vshader_hw_map2gl,   shader_glsl_pow, 0, 0},
112     {WINED3DSIO_CRS,    "crs",  "XPD", 1, 3, vshader_hw_map2gl,   shader_glsl_cross, 0, 0},
113     /* TODO: sng can possibly be performed a  s
114         RCP tmp, vec
115         MUL out, tmp, vec*/
116     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
117     {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
118     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
119     {WINED3DSIO_SINCOS, "sincos",  "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
120     /* Matrix */
121     {WINED3DSIO_M4x4,   "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
122     {WINED3DSIO_M4x3,   "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
123     {WINED3DSIO_M3x4,   "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
124     {WINED3DSIO_M3x3,   "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
125     {WINED3DSIO_M3x2,   "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
126     /* Declare registers */
127     {WINED3DSIO_DCL,    "dcl",      NULL,                0, 2, NULL, NULL, 0, 0},
128     /* Constant definitions */
129     {WINED3DSIO_DEF,    "def",      NULL,                1, 5, NULL, NULL, 0, 0},
130     {WINED3DSIO_DEFB,   "defb",     GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
131     {WINED3DSIO_DEFI,   "defi",     GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
132     /* Flow control - requires GLSL or software shaders */
133     {WINED3DSIO_REP ,   "rep",      NULL, 0, 1, NULL, shader_glsl_rep,    WINED3DVS_VERSION(2,0), -1},
134     {WINED3DSIO_ENDREP, "endrep",   NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
135     {WINED3DSIO_IF,     "if",       NULL, 0, 1, NULL, shader_glsl_if,     WINED3DVS_VERSION(2,0), -1},
136     {WINED3DSIO_IFC,    "ifc",      NULL, 0, 2, NULL, shader_glsl_ifc,    WINED3DVS_VERSION(2,1), -1},
137     {WINED3DSIO_ELSE,   "else",     NULL, 0, 0, NULL, shader_glsl_else,   WINED3DVS_VERSION(2,0), -1},
138     {WINED3DSIO_ENDIF,  "endif",    NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
139     {WINED3DSIO_BREAK,  "break",    NULL, 0, 0, NULL, shader_glsl_break,  WINED3DVS_VERSION(2,1), -1},
140     {WINED3DSIO_BREAKC, "breakc",   NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
141     {WINED3DSIO_BREAKP, "breakp",   GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
142     {WINED3DSIO_CALL,   "call",     NULL, 0, 1, NULL, shader_glsl_call,   WINED3DVS_VERSION(2,0), -1},
143     {WINED3DSIO_CALLNZ, "callnz",   NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
144     {WINED3DSIO_LOOP,   "loop",     NULL, 0, 2, NULL, shader_glsl_loop,   WINED3DVS_VERSION(2,0), -1},
145     {WINED3DSIO_RET,    "ret",      NULL, 0, 0, NULL, NULL,               WINED3DVS_VERSION(2,0), -1},
146     {WINED3DSIO_ENDLOOP,"endloop",  NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
147     {WINED3DSIO_LABEL,  "label",    NULL, 0, 1, NULL, shader_glsl_label,  WINED3DVS_VERSION(2,0), -1},
148
149     {WINED3DSIO_SETP,   "setp",     GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
150     {WINED3DSIO_TEXLDL, "texldl",   NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
151     {0,                 NULL,       NULL,                0, 0, NULL, NULL, 0, 0}
152 };
153
154 static void vshader_set_limits(
155       IWineD3DVertexShaderImpl *This) {
156
157       This->baseShader.limits.texcoord = 0;
158       This->baseShader.limits.attributes = 16;
159       This->baseShader.limits.packed_input = 0;
160
161       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
162       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
163
164       switch (This->baseShader.hex_version) {
165           case WINED3DVS_VERSION(1,0):
166           case WINED3DVS_VERSION(1,1):
167                    This->baseShader.limits.temporary = 12;
168                    This->baseShader.limits.constant_bool = 0;
169                    This->baseShader.limits.constant_int = 0;
170                    This->baseShader.limits.address = 1;
171                    This->baseShader.limits.packed_output = 0;
172                    This->baseShader.limits.sampler = 0;
173                    This->baseShader.limits.label = 0;
174                    break;
175       
176           case WINED3DVS_VERSION(2,0):
177           case WINED3DVS_VERSION(2,1):
178                    This->baseShader.limits.temporary = 12;
179                    This->baseShader.limits.constant_bool = 16;
180                    This->baseShader.limits.constant_int = 16;
181                    This->baseShader.limits.address = 1;
182                    This->baseShader.limits.packed_output = 0;
183                    This->baseShader.limits.sampler = 0;
184                    This->baseShader.limits.label = 16;
185                    break;
186
187           case WINED3DVS_VERSION(3,0):
188                    This->baseShader.limits.temporary = 32;
189                    This->baseShader.limits.constant_bool = 32;
190                    This->baseShader.limits.constant_int = 32;
191                    This->baseShader.limits.address = 1;
192                    This->baseShader.limits.packed_output = 12;
193                    This->baseShader.limits.sampler = 4;
194                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
195                    break;
196
197           default: This->baseShader.limits.temporary = 12;
198                    This->baseShader.limits.constant_bool = 16;
199                    This->baseShader.limits.constant_int = 16;
200                    This->baseShader.limits.address = 1;
201                    This->baseShader.limits.packed_output = 0;
202                    This->baseShader.limits.sampler = 0;
203                    This->baseShader.limits.label = 16;
204                    FIXME("Unrecognized vertex shader version %#x\n",
205                        This->baseShader.hex_version);
206       }
207 }
208
209 /* This is an internal function,
210  * used to create fake semantics for shaders
211  * that don't have them - d3d8 shaders where the declaration
212  * stores the register for each input
213  */
214 static void vshader_set_input(
215     IWineD3DVertexShaderImpl* This,
216     unsigned int regnum,
217     BYTE usage, BYTE usage_idx) {
218
219     /* Fake usage: set reserved bit, usage, usage_idx */
220     DWORD usage_token = (0x1 << 31) |
221         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
222
223     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
224     DWORD reg_token = (0x1 << 31) |
225         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
226
227     This->semantics_in[regnum].usage = usage_token;
228     This->semantics_in[regnum].reg = reg_token;
229 }
230
231 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
232     if (usage_idx1 != usage_idx2) return FALSE;
233     if (usage1 == usage2) return TRUE;
234     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
235     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
236
237     return FALSE;
238 }
239
240 BOOL vshader_get_input(
241     IWineD3DVertexShader* iface,
242     BYTE usage_req, BYTE usage_idx_req,
243     unsigned int* regnum) {
244
245     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
246     int i;
247
248     for (i = 0; i < MAX_ATTRIBS; i++) {
249         DWORD usage_token = This->semantics_in[i].usage;
250         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
251         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
252
253         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
254             *regnum = i;
255             return TRUE;
256         }
257     }
258     return FALSE;
259 }
260
261 BOOL vshader_input_is_color(
262     IWineD3DVertexShader* iface,
263     unsigned int regnum) {
264
265     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
266
267     DWORD usage_token = This->semantics_in[regnum].usage;
268     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
269     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
270
271     int i;
272
273     for(i = 0; i < This->num_swizzled_attribs; i++) {
274         if(This->swizzled_attribs[i].usage == usage &&
275            This->swizzled_attribs[i].idx == usage_idx) {
276             return TRUE;
277         }
278     }
279     return FALSE;
280 }
281
282 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
283     UINT num = 0, i, j;
284     UINT numoldswizzles = This->num_swizzled_attribs;
285     IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
286
287     DWORD usage_token, usage, usage_idx;
288     BOOL found;
289
290     attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
291
292     /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
293     memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
294
295     memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
296
297     for(i = 0; i < decl->num_swizzled_attribs; i++) {
298         for(j = 0; j < MAX_ATTRIBS; j++) {
299
300             if(!This->baseShader.reg_maps.attributes[j]) continue;
301
302             usage_token = This->semantics_in[j].usage;
303             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
304             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
305
306             if(decl->swizzled_attribs[i].usage == usage &&
307                decl->swizzled_attribs[i].idx == usage_idx) {
308                 This->swizzled_attribs[num].usage = usage;
309                 This->swizzled_attribs[num].idx = usage_idx;
310                 num++;
311             }
312         }
313     }
314
315     /* Add previously converted attributes back in if they are not defined in the current declaration */
316     for(i = 0; i < numoldswizzles; i++) {
317
318         found = FALSE;
319         for(j = 0; j < decl->declarationWNumElements; j++) {
320             if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
321                oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
322                 found = TRUE;
323             }
324         }
325         if(found) {
326             /* This previously converted attribute is declared in the current declaration. Either it is
327              * already in the new array, or it should not be there. Skip it
328              */
329             continue;
330         }
331         /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
332          * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
333          * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
334          * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
335          * stays unswizzled as well because it isn't found in the oldswizzles array
336          */
337         for(j = 0; j < num; j++) {
338             if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
339                oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
340                oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
341                 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
342                          sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
343                 break;
344             }
345         }
346         This->swizzled_attribs[j].usage = oldswizzles[i].usage;
347         This->swizzled_attribs[j].idx = oldswizzles[i].idx;
348         num++;
349     }
350
351     TRACE("New swizzled attributes array\n");
352     for(i = 0; i < num; i++) {
353         TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
354               This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
355     }
356     This->num_swizzled_attribs = num;
357 }
358 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
359     or GLSL and send it to the card */
360 static VOID IWineD3DVertexShaderImpl_GenerateShader(
361     IWineD3DVertexShader *iface,
362     shader_reg_maps* reg_maps,
363     CONST DWORD *pFunction) {
364
365     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
366     IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
367     SHADER_BUFFER buffer;
368
369     find_swizzled_attribs(decl, This);
370
371 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
372         it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
373     if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
374         HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
375         This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
376         This->fixupVertexBufferSize = PGMSIZE;
377         This->fixupVertexBuffer[0] = 0;
378     }
379     buffer.buffer = This->device->fixupVertexBuffer;
380 #else
381     buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); 
382 #endif
383     buffer.bsize = 0;
384     buffer.lineNo = 0;
385     buffer.newline = TRUE;
386
387     if (This->baseShader.shader_mode == SHADER_GLSL) {
388
389         /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
390         GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
391
392         /* Base Declarations */
393         shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
394
395         /* Base Shader Body */
396         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
397
398         /* Unpack 3.0 outputs */
399         if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0)) {
400             shader_addline(&buffer, "order_ps_input(OUT);\n");
401         } else {
402             shader_addline(&buffer, "order_ps_input();\n");
403         }
404
405         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
406         if (!reg_maps->fog)
407             shader_addline(&buffer, "gl_FogFragCoord = gl_Position.z;\n");
408
409         /* Write the final position.
410          *
411          * OpenGL coordinates specify the center of the pixel while d3d coords specify
412          * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
413          * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
414          * contains 1.0 to allow a mad.
415          */
416         shader_addline(&buffer, "gl_Position.xy = gl_Position.xy * posFixup.xy + posFixup.zw;\n");
417
418         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
419          *
420          * Basically we want(in homogenous coordinates) z = z * 2 - 1. However, shaders are run
421          * before the homogenous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
422          * which is the same as z = z / 2 - w.
423          */
424         shader_addline(&buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
425
426         shader_addline(&buffer, "}\n");
427
428         TRACE("Compiling shader object %u\n", shader_obj);
429         GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
430         GL_EXTCALL(glCompileShaderARB(shader_obj));
431         print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
432
433         /* Store the shader object */
434         This->baseShader.prgId = shader_obj;
435
436     } else if (This->baseShader.shader_mode == SHADER_ARB) {
437
438         /*  Create the hw ARB shader */
439         shader_addline(&buffer, "!!ARBvp1.0\n");
440         shader_addline(&buffer, "PARAM helper_const = { 2.0, -1.0, %d.0, 0.0 };\n", This->rel_offset);
441
442         /* Mesa supports only 95 constants */
443         if (GL_VEND(MESA) || GL_VEND(WINE))
444             This->baseShader.limits.constant_float = 
445                 min(95, This->baseShader.limits.constant_float);
446
447         /* Some instructions need a temporary register. Add it if needed, but only if it is really needed */
448         if(reg_maps->usesnrm || This->rel_offset) {
449             shader_addline(&buffer, "TEMP TMP;\n");
450         }
451
452         /* Base Declarations */
453         shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
454
455         /* We need a constant to fixup the final position */
456         shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
457
458         /* Base Shader Body */
459         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
460
461         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
462         if (!reg_maps->fog)
463             shader_addline(&buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
464
465         /* Write the final position.
466          *
467          * OpenGL coordinates specify the center of the pixel while d3d coords specify
468          * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
469          * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
470          * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
471          */
472         shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, posFixup.z;\n");
473         shader_addline(&buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, posFixup.w;\n");
474
475         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
476          * and the glsl equivalent
477          */
478         shader_addline(&buffer, "MAD TMP_OUT.z, TMP_OUT.z, helper_const.x, -TMP_OUT.w;\n");
479
480         shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
481         
482         shader_addline(&buffer, "END\n"); 
483
484         /* TODO: change to resource.glObjectHandle or something like that */
485         GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
486
487         TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
488         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
489
490         TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
491         /* Create the program and check for errors */
492         GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
493             buffer.bsize, buffer.buffer));
494
495         if (glGetError() == GL_INVALID_OPERATION) {
496             GLint errPos;
497             glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
498             FIXME("HW VertexShader Error at position %d: %s\n",
499                   errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
500             This->baseShader.prgId = -1;
501         }
502     }
503
504 #if 1 /* if were using the data buffer of device then we don't need to free it */
505   HeapFree(GetProcessHeap(), 0, buffer.buffer);
506 #endif
507 }
508
509 /* *******************************************
510    IWineD3DVertexShader IUnknown parts follow
511    ******************************************* */
512 static HRESULT  WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
513     return IWineD3DBaseShaderImpl_QueryInterface((IWineD3DBaseShader *) iface, riid, ppobj);
514 }
515
516 static ULONG  WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
517     return IWineD3DBaseShaderImpl_AddRef((IWineD3DBaseShader *) iface);
518 }
519
520 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
521     return IWineD3DBaseShaderImpl_Release((IWineD3DBaseShader *) iface);
522 }
523
524 /* *******************************************
525    IWineD3DVertexShader IWineD3DVertexShader parts follow
526    ******************************************* */
527
528 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
529     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
530     
531     *parent = This->parent;
532     IUnknown_AddRef(*parent);
533     TRACE("(%p) : returning %p\n", This, *parent);
534     return WINED3D_OK;
535 }
536
537 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
538     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
539     IWineD3DDevice_AddRef(This->baseShader.device);
540     *pDevice = This->baseShader.device;
541     TRACE("(%p) returning %p\n", This, *pDevice);
542     return WINED3D_OK;
543 }
544
545 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
546     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
547     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
548
549     if (NULL == pData) {
550         *pSizeOfData = This->baseShader.functionLength;
551         return WINED3D_OK;
552     }
553     if (*pSizeOfData < This->baseShader.functionLength) {
554         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
555          * than the required size we should write the required size and
556          * return D3DERR_MOREDATA. That's not actually true. */
557         return WINED3DERR_INVALIDCALL;
558     }
559     if (NULL == This->baseShader.function) { /* no function defined */
560         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
561         (*(DWORD **) pData) = NULL;
562     } else {
563         if(This->baseShader.functionLength == 0){
564
565         }
566         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
567         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
568     }
569     return WINED3D_OK;
570 }
571
572 /* Note that for vertex shaders CompileShader isn't called until the
573  * shader is first used. The reason for this is that we need the vertex
574  * declaration the shader will be used with in order to determine if
575  * the data in a register is of type D3DCOLOR, and needs swizzling. */
576 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
577
578     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
579     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
580     HRESULT hr;
581     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
582
583     TRACE("(%p) : pFunction %p\n", iface, pFunction);
584
585     /* First pass: trace shader */
586     shader_trace_init((IWineD3DBaseShader*) This, pFunction);
587     vshader_set_limits(This);
588
589     /* Initialize immediate constant lists */
590     list_init(&This->baseShader.constantsF);
591     list_init(&This->baseShader.constantsB);
592     list_init(&This->baseShader.constantsI);
593
594     /* Second pass: figure out registers used, semantics, etc.. */
595     This->min_rel_offset = GL_LIMITS(vshader_constantsF);
596     This->max_rel_offset = 0;
597     memset(reg_maps, 0, sizeof(shader_reg_maps));
598     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
599        This->semantics_in, This->semantics_out, pFunction, NULL);
600     if (hr != WINED3D_OK) return hr;
601
602     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
603
604     if(deviceImpl->vs_selected_mode == SHADER_ARB &&
605        (GLINFO_LOCATION).arb_vs_offset_limit      &&
606        This->min_rel_offset <= This->max_rel_offset) {
607
608         if(This->max_rel_offset - This->min_rel_offset > 127) {
609             FIXME("The difference between the minimum and maximum relative offset is > 127\n");
610             FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
611             FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
612         } else if(This->max_rel_offset - This->min_rel_offset > 63) {
613             This->rel_offset = This->min_rel_offset + 63;
614         } else if(This->max_rel_offset > 63) {
615             This->rel_offset = This->min_rel_offset;
616         } else {
617             This->rel_offset = 0;
618         }
619     }
620     This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
621
622     /* copy the function ... because it will certainly be released by application */
623     if (NULL != pFunction) {
624         void *function;
625
626         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
627         if (!function) return E_OUTOFMEMORY;
628         memcpy(function, pFunction, This->baseShader.functionLength);
629         This->baseShader.function = function;
630     } else {
631         This->baseShader.function = NULL;
632     }
633
634     return WINED3D_OK;
635 }
636
637 /* Preload semantics for d3d8 shaders */
638 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
639     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
640     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
641
642     int i;
643     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
644         WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
645         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
646     }
647 }
648
649 /* Set local constants for d3d8 shaders */
650 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
651         UINT start_idx, const float *src_data, UINT count) {
652     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
653     UINT i, end_idx;
654
655     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
656
657     end_idx = start_idx + count;
658     if (end_idx > GL_LIMITS(vshader_constantsF)) {
659         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
660         end_idx = GL_LIMITS(vshader_constantsF);
661     }
662
663     for (i = start_idx; i < end_idx; ++i) {
664         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
665         if (!lconst) return E_OUTOFMEMORY;
666
667         lconst->idx = i;
668         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
669         list_add_head(&This->baseShader.constantsF, &lconst->entry);
670     }
671
672     return WINED3D_OK;
673 }
674
675 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
676     UINT i, j, k;
677     BOOL found;
678
679     DWORD usage_token;
680     DWORD usage;
681     DWORD usage_idx;
682
683     for(i = 0; i < vdecl->declarationWNumElements; i++) {
684         for(j = 0; j < MAX_ATTRIBS; j++) {
685             if(!This->baseShader.reg_maps.attributes[j]) continue;
686
687             usage_token = This->semantics_in[j].usage;
688             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
689             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
690
691             if(vdecl->pDeclarationWine[i].Usage != usage ||
692                vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
693                 continue;
694             }
695
696             found = FALSE;
697             for(k = 0; k < This->num_swizzled_attribs; k++) {
698                 if(This->swizzled_attribs[k].usage == usage &&
699                     This->swizzled_attribs[k].idx == usage_idx) {
700                     found = TRUE;
701                 }
702             }
703             if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
704                 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
705                       debug_d3ddeclusage(usage), usage_idx);
706                 return TRUE;
707             }
708             if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
709                 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
710                       debug_d3ddeclusage(usage), usage_idx);
711                 return TRUE;
712             }
713         }
714     }
715     return FALSE;
716 }
717
718 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
719     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
720     IWineD3DVertexDeclarationImpl *vdecl;
721     CONST DWORD *function = This->baseShader.function;
722     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
723
724     TRACE("(%p) : function %p\n", iface, function);
725
726     /* We're already compiled. */
727     if (This->baseShader.is_compiled) {
728         vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
729
730         if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
731            memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
732
733             /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
734              * we have to recompile, but we have to take a deeper look at see if the attribs that differ
735              * are declared in the decl and used in the shader
736              */
737             if(swizzled_attribs_differ(This, vdecl)) {
738                 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
739                 goto recompile;
740             }
741             WARN("Swizzled attribute validation required an expensive comparison\n");
742         }
743
744         return WINED3D_OK;
745
746         recompile:
747         if(This->recompile_count < 50) {
748             This->recompile_count++;
749         } else {
750             FIXME("Vertexshader %p recompiled more than 50 times\n", This);
751         }
752
753         deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
754     }
755
756     /* We don't need to compile */
757     if (!function) {
758         This->baseShader.is_compiled = TRUE;
759         return WINED3D_OK;
760     }
761
762     /* Generate the HW shader */
763     TRACE("(%p) : Generating hardware program\n", This);
764     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
765
766     This->baseShader.is_compiled = TRUE;
767
768     return WINED3D_OK;
769 }
770
771 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
772 {
773     /*** IUnknown methods ***/
774     IWineD3DVertexShaderImpl_QueryInterface,
775     IWineD3DVertexShaderImpl_AddRef,
776     IWineD3DVertexShaderImpl_Release,
777     /*** IWineD3DBase methods ***/
778     IWineD3DVertexShaderImpl_GetParent,
779     /*** IWineD3DBaseShader methods ***/
780     IWineD3DVertexShaderImpl_SetFunction,
781     IWineD3DVertexShaderImpl_CompileShader,
782     /*** IWineD3DVertexShader methods ***/
783     IWineD3DVertexShaderImpl_GetDevice,
784     IWineD3DVertexShaderImpl_GetFunction,
785     IWineD3DVertexShaderImpl_FakeSemantics,
786     IWIneD3DVertexShaderImpl_SetLocalConstantsF
787 };