comdlg32: DPRINTF -> TRACE.
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24
25 #include "config.h"
26
27 #include <math.h>
28 #include <stdio.h>
29
30 #include "wined3d_private.h"
31
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
35
36 /* Shader debugging - Change the following line to enable debugging of software
37       vertex shaders                                                             */
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
45
46 /**
47  * DirectX9 SDK download
48  *  http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
49  *
50  * Exploring D3DX
51  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
52  *
53  * Using Vertex Shaders
54  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
55  *
56  * Dx9 New
57  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
58  *
59  * Dx9 Shaders
60  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
64  *
65  * Dx9 D3DX
66  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
67  *
68  * FVF
69  *  http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
70  *
71  * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72  *  http://developer.nvidia.com/view.asp?IO=vstovp
73  *
74  * NVIDIA: Memory Management with VAR
75  *  http://developer.nvidia.com/view.asp?IO=var_memory_management
76  */
77
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
80
81 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
82
83 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
84     /* This table is not order or position dependent. */
85
86     /* Arithmetic */
87     {WINED3DSIO_NOP,    "nop",  "NOP", 0, 0, vshader_hw_map2gl,   NULL, 0, 0},
88     {WINED3DSIO_MOV,    "mov",  "MOV", 1, 2, vshader_hw_map2gl,   shader_glsl_mov, 0, 0},
89     {WINED3DSIO_MOVA,   "mova",  NULL, 1, 2, vshader_hw_map2gl,   shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
90     {WINED3DSIO_ADD,    "add",  "ADD", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
91     {WINED3DSIO_SUB,    "sub",  "SUB", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
92     {WINED3DSIO_MAD,    "mad",  "MAD", 1, 4, vshader_hw_map2gl,   shader_glsl_mad, 0, 0},
93     {WINED3DSIO_MUL,    "mul",  "MUL", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
94     {WINED3DSIO_RCP,    "rcp",  "RCP", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rcp, 0, 0},
95     {WINED3DSIO_RSQ,    "rsq",  "RSQ", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rsq, 0, 0},
96     {WINED3DSIO_DP3,    "dp3",  "DP3", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
97     {WINED3DSIO_DP4,    "dp4",  "DP4", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
98     {WINED3DSIO_MIN,    "min",  "MIN", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
99     {WINED3DSIO_MAX,    "max",  "MAX", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
100     {WINED3DSIO_SLT,    "slt",  "SLT", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
101     {WINED3DSIO_SGE,    "sge",  "SGE", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
102     {WINED3DSIO_ABS,    "abs",  "ABS", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
103     {WINED3DSIO_EXP,    "exp",  "EX2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
104     {WINED3DSIO_LOG,    "log",  "LG2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
105     {WINED3DSIO_EXPP,   "expp", "EXP", 1, 2, vshader_hw_map2gl,   shader_glsl_expp, 0, 0},
106     {WINED3DSIO_LOGP,   "logp", "LOG", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
107     {WINED3DSIO_LIT,    "lit",  "LIT", 1, 2, vshader_hw_map2gl,   shader_glsl_lit, 0, 0},
108     {WINED3DSIO_DST,    "dst",  "DST", 1, 3, vshader_hw_map2gl,   shader_glsl_dst, 0, 0},
109     {WINED3DSIO_LRP,    "lrp",  "LRP", 1, 4, NULL,                shader_glsl_lrp, 0, 0},
110     {WINED3DSIO_FRC,    "frc",  "FRC", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
111     {WINED3DSIO_POW,    "pow",  "POW", 1, 3, vshader_hw_map2gl,   shader_glsl_pow, 0, 0},
112     {WINED3DSIO_CRS,    "crs",  "XPD", 1, 3, vshader_hw_map2gl,   shader_glsl_cross, 0, 0},
113     /* TODO: sng can possibly be performed a  s
114         RCP tmp, vec
115         MUL out, tmp, vec*/
116     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
117     {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
118     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
119     {WINED3DSIO_SINCOS, "sincos",  "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
120     /* Matrix */
121     {WINED3DSIO_M4x4,   "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
122     {WINED3DSIO_M4x3,   "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
123     {WINED3DSIO_M3x4,   "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
124     {WINED3DSIO_M3x3,   "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
125     {WINED3DSIO_M3x2,   "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
126     /* Declare registers */
127     {WINED3DSIO_DCL,    "dcl",      NULL,                0, 2, NULL, NULL, 0, 0},
128     /* Constant definitions */
129     {WINED3DSIO_DEF,    "def",      NULL,                1, 5, NULL, NULL, 0, 0},
130     {WINED3DSIO_DEFB,   "defb",     GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
131     {WINED3DSIO_DEFI,   "defi",     GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
132     /* Flow control - requires GLSL or software shaders */
133     {WINED3DSIO_REP ,   "rep",      NULL, 0, 1, NULL, shader_glsl_rep,    WINED3DVS_VERSION(2,0), -1},
134     {WINED3DSIO_ENDREP, "endrep",   NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
135     {WINED3DSIO_IF,     "if",       NULL, 0, 1, NULL, shader_glsl_if,     WINED3DVS_VERSION(2,0), -1},
136     {WINED3DSIO_IFC,    "ifc",      NULL, 0, 2, NULL, shader_glsl_ifc,    WINED3DVS_VERSION(2,1), -1},
137     {WINED3DSIO_ELSE,   "else",     NULL, 0, 0, NULL, shader_glsl_else,   WINED3DVS_VERSION(2,0), -1},
138     {WINED3DSIO_ENDIF,  "endif",    NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
139     {WINED3DSIO_BREAK,  "break",    NULL, 0, 0, NULL, shader_glsl_break,  WINED3DVS_VERSION(2,1), -1},
140     {WINED3DSIO_BREAKC, "breakc",   NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
141     {WINED3DSIO_BREAKP, "breakp",   GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
142     {WINED3DSIO_CALL,   "call",     NULL, 0, 1, NULL, shader_glsl_call,   WINED3DVS_VERSION(2,0), -1},
143     {WINED3DSIO_CALLNZ, "callnz",   NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
144     {WINED3DSIO_LOOP,   "loop",     NULL, 0, 2, NULL, shader_glsl_loop,   WINED3DVS_VERSION(2,0), -1},
145     {WINED3DSIO_RET,    "ret",      NULL, 0, 0, NULL, NULL,               WINED3DVS_VERSION(2,0), -1},
146     {WINED3DSIO_ENDLOOP,"endloop",  NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
147     {WINED3DSIO_LABEL,  "label",    NULL, 0, 1, NULL, shader_glsl_label,  WINED3DVS_VERSION(2,0), -1},
148
149     {WINED3DSIO_SETP,   "setp",     GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
150     {WINED3DSIO_TEXLDL, "texldl",   NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
151     {0,                 NULL,       NULL,                0, 0, NULL, NULL, 0, 0}
152 };
153
154 static void vshader_set_limits(
155       IWineD3DVertexShaderImpl *This) {
156
157       This->baseShader.limits.texcoord = 0;
158       This->baseShader.limits.attributes = 16;
159       This->baseShader.limits.packed_input = 0;
160
161       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
162       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
163
164       switch (This->baseShader.hex_version) {
165           case WINED3DVS_VERSION(1,0):
166           case WINED3DVS_VERSION(1,1):
167                    This->baseShader.limits.temporary = 12;
168                    This->baseShader.limits.constant_bool = 0;
169                    This->baseShader.limits.constant_int = 0;
170                    This->baseShader.limits.address = 1;
171                    This->baseShader.limits.packed_output = 0;
172                    This->baseShader.limits.sampler = 0;
173                    This->baseShader.limits.label = 0;
174                    break;
175       
176           case WINED3DVS_VERSION(2,0):
177           case WINED3DVS_VERSION(2,1):
178                    This->baseShader.limits.temporary = 12;
179                    This->baseShader.limits.constant_bool = 16;
180                    This->baseShader.limits.constant_int = 16;
181                    This->baseShader.limits.address = 1;
182                    This->baseShader.limits.packed_output = 0;
183                    This->baseShader.limits.sampler = 0;
184                    This->baseShader.limits.label = 16;
185                    break;
186
187           case WINED3DVS_VERSION(3,0):
188                    This->baseShader.limits.temporary = 32;
189                    This->baseShader.limits.constant_bool = 32;
190                    This->baseShader.limits.constant_int = 32;
191                    This->baseShader.limits.address = 1;
192                    This->baseShader.limits.packed_output = 12;
193                    This->baseShader.limits.sampler = 4;
194                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
195                    break;
196
197           default: This->baseShader.limits.temporary = 12;
198                    This->baseShader.limits.constant_bool = 16;
199                    This->baseShader.limits.constant_int = 16;
200                    This->baseShader.limits.address = 1;
201                    This->baseShader.limits.packed_output = 0;
202                    This->baseShader.limits.sampler = 0;
203                    This->baseShader.limits.label = 16;
204                    FIXME("Unrecognized vertex shader version %#x\n",
205                        This->baseShader.hex_version);
206       }
207 }
208
209 /* This is an internal function,
210  * used to create fake semantics for shaders
211  * that don't have them - d3d8 shaders where the declaration
212  * stores the register for each input
213  */
214 static void vshader_set_input(
215     IWineD3DVertexShaderImpl* This,
216     unsigned int regnum,
217     BYTE usage, BYTE usage_idx) {
218
219     /* Fake usage: set reserved bit, usage, usage_idx */
220     DWORD usage_token = (0x1 << 31) |
221         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
222
223     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
224     DWORD reg_token = (0x1 << 31) |
225         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
226
227     This->semantics_in[regnum].usage = usage_token;
228     This->semantics_in[regnum].reg = reg_token;
229 }
230
231 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
232     if (usage_idx1 != usage_idx2) return FALSE;
233     if (usage1 == usage2) return TRUE;
234     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
235     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
236
237     return FALSE;
238 }
239
240 BOOL vshader_get_input(
241     IWineD3DVertexShader* iface,
242     BYTE usage_req, BYTE usage_idx_req,
243     unsigned int* regnum) {
244
245     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
246     int i;
247
248     for (i = 0; i < MAX_ATTRIBS; i++) {
249         DWORD usage_token = This->semantics_in[i].usage;
250         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
251         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
252
253         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
254             *regnum = i;
255             return TRUE;
256         }
257     }
258     return FALSE;
259 }
260
261 BOOL vshader_input_is_color(
262     IWineD3DVertexShader* iface,
263     unsigned int regnum) {
264
265     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
266
267     DWORD usage_token = This->semantics_in[regnum].usage;
268     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
269     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
270
271     int i;
272
273     for(i = 0; i < This->num_swizzled_attribs; i++) {
274         if(This->swizzled_attribs[i].usage == usage &&
275            This->swizzled_attribs[i].idx == usage_idx) {
276             return TRUE;
277         }
278     }
279     return FALSE;
280 }
281
282 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
283     UINT num = 0, i, j;
284     UINT numoldswizzles = This->num_swizzled_attribs;
285     IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
286
287     DWORD usage_token, usage, usage_idx;
288     BOOL found;
289
290     attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
291
292     /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
293     memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
294
295     memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
296
297     for(i = 0; i < decl->num_swizzled_attribs; i++) {
298         for(j = 0; j < MAX_ATTRIBS; j++) {
299             usage_token = This->semantics_in[j].usage;
300             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
301             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
302
303             if(decl->swizzled_attribs[i].usage == usage &&
304                decl->swizzled_attribs[i].idx == usage_idx) {
305                 This->swizzled_attribs[num].usage = usage;
306                 This->swizzled_attribs[num].idx = usage_idx;
307                 num++;
308             }
309         }
310     }
311
312     /* Add previously converted attributes back in if they are not defined in the current declaration */
313     for(i = 0; i < numoldswizzles; i++) {
314
315         found = FALSE;
316         for(j = 0; j < decl->declarationWNumElements; j++) {
317             if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
318                oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
319                 found = TRUE;
320             }
321         }
322         if(found) {
323             /* This previously converted attribute is declared in the current declaration. Either it is
324              * already in the new array, or it should not be there. Skip it
325              */
326             continue;
327         }
328         /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
329          * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
330          * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
331          * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
332          * stays unswizzled as well because it isn't found in the oldswizzles array
333          */
334         for(j = 0; j < num; j++) {
335             if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
336                oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
337                oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
338                 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
339                          sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
340                 break;
341             }
342         }
343         This->swizzled_attribs[j].usage = oldswizzles[i].usage;
344         This->swizzled_attribs[j].idx = oldswizzles[i].idx;
345         num++;
346     }
347
348     TRACE("New swizzled attributes array\n");
349     for(i = 0; i < num; i++) {
350         TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
351               This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
352     }
353     This->num_swizzled_attribs = num;
354 }
355 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
356     or GLSL and send it to the card */
357 static VOID IWineD3DVertexShaderImpl_GenerateShader(
358     IWineD3DVertexShader *iface,
359     shader_reg_maps* reg_maps,
360     CONST DWORD *pFunction) {
361
362     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
363     IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
364     SHADER_BUFFER buffer;
365
366     find_swizzled_attribs(decl, This);
367
368 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
369         it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
370     if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
371         HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
372         This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
373         This->fixupVertexBufferSize = PGMSIZE;
374         This->fixupVertexBuffer[0] = 0;
375     }
376     buffer.buffer = This->device->fixupVertexBuffer;
377 #else
378     buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); 
379 #endif
380     buffer.bsize = 0;
381     buffer.lineNo = 0;
382     buffer.newline = TRUE;
383
384     if (This->baseShader.shader_mode == SHADER_GLSL) {
385
386         /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
387         GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
388
389         /* Base Declarations */
390         shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
391
392         /* Base Shader Body */
393         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
394
395         /* Unpack 3.0 outputs */
396         if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0)) {
397             shader_addline(&buffer, "order_ps_input(OUT);\n");
398         } else {
399             shader_addline(&buffer, "order_ps_input();\n");
400         }
401
402         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
403         if (!reg_maps->fog)
404             shader_addline(&buffer, "gl_FogFragCoord = gl_Position.z;\n");
405
406         /* Write the final position.
407          *
408          * OpenGL coordinates specify the center of the pixel while d3d coords specify
409          * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
410          * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
411          * contains 1.0 to allow a mad.
412          */
413         shader_addline(&buffer, "gl_Position.xy = gl_Position.xy * posFixup.xy + posFixup.zw;\n");
414
415         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
416          *
417          * Basically we want(in homogenous coordinates) z = z * 2 - 1. However, shaders are run
418          * before the homogenous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
419          * which is the same as z = z / 2 - w.
420          */
421         shader_addline(&buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
422
423         shader_addline(&buffer, "}\n");
424
425         TRACE("Compiling shader object %u\n", shader_obj);
426         GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
427         GL_EXTCALL(glCompileShaderARB(shader_obj));
428         print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
429
430         /* Store the shader object */
431         This->baseShader.prgId = shader_obj;
432
433     } else if (This->baseShader.shader_mode == SHADER_ARB) {
434
435         /*  Create the hw ARB shader */
436         shader_addline(&buffer, "!!ARBvp1.0\n");
437         shader_addline(&buffer, "PARAM helper_const = { 2.0, -1.0, %d.0, 0.0 };\n", This->rel_offset);
438
439         /* Mesa supports only 95 constants */
440         if (GL_VEND(MESA) || GL_VEND(WINE))
441             This->baseShader.limits.constant_float = 
442                 min(95, This->baseShader.limits.constant_float);
443
444         /* Some instructions need a temporary register. Add it if needed, but only if it is really needed */
445         if(reg_maps->usesnrm || This->rel_offset) {
446             shader_addline(&buffer, "TEMP TMP;\n");
447         }
448
449         /* Base Declarations */
450         shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
451
452         /* We need a constant to fixup the final position */
453         shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
454
455         /* Base Shader Body */
456         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
457
458         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
459         if (!reg_maps->fog)
460             shader_addline(&buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
461
462         /* Write the final position.
463          *
464          * OpenGL coordinates specify the center of the pixel while d3d coords specify
465          * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
466          * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
467          * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
468          */
469         shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, posFixup.z;\n");
470         shader_addline(&buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, posFixup.w;\n");
471
472         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
473          * and the glsl equivalent
474          */
475         shader_addline(&buffer, "MAD TMP_OUT.z, TMP_OUT.z, helper_const.x, -TMP_OUT.w;\n");
476
477         shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
478         
479         shader_addline(&buffer, "END\n"); 
480
481         /* TODO: change to resource.glObjectHandle or something like that */
482         GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
483
484         TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
485         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
486
487         TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
488         /* Create the program and check for errors */
489         GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
490             buffer.bsize, buffer.buffer));
491
492         if (glGetError() == GL_INVALID_OPERATION) {
493             GLint errPos;
494             glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
495             FIXME("HW VertexShader Error at position %d: %s\n",
496                   errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
497             This->baseShader.prgId = -1;
498         }
499     }
500
501 #if 1 /* if were using the data buffer of device then we don't need to free it */
502   HeapFree(GetProcessHeap(), 0, buffer.buffer);
503 #endif
504 }
505
506 /* *******************************************
507    IWineD3DVertexShader IUnknown parts follow
508    ******************************************* */
509 static HRESULT  WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
510     return IWineD3DBaseShaderImpl_QueryInterface((IWineD3DBaseShader *) iface, riid, ppobj);
511 }
512
513 static ULONG  WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
514     return IWineD3DBaseShaderImpl_AddRef((IWineD3DBaseShader *) iface);
515 }
516
517 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
518     return IWineD3DBaseShaderImpl_Release((IWineD3DBaseShader *) iface);
519 }
520
521 /* *******************************************
522    IWineD3DVertexShader IWineD3DVertexShader parts follow
523    ******************************************* */
524
525 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
526     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
527     
528     *parent = This->parent;
529     IUnknown_AddRef(*parent);
530     TRACE("(%p) : returning %p\n", This, *parent);
531     return WINED3D_OK;
532 }
533
534 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
535     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
536     IWineD3DDevice_AddRef(This->baseShader.device);
537     *pDevice = This->baseShader.device;
538     TRACE("(%p) returning %p\n", This, *pDevice);
539     return WINED3D_OK;
540 }
541
542 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
543     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
544     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
545
546     if (NULL == pData) {
547         *pSizeOfData = This->baseShader.functionLength;
548         return WINED3D_OK;
549     }
550     if (*pSizeOfData < This->baseShader.functionLength) {
551         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
552          * than the required size we should write the required size and
553          * return D3DERR_MOREDATA. That's not actually true. */
554         return WINED3DERR_INVALIDCALL;
555     }
556     if (NULL == This->baseShader.function) { /* no function defined */
557         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
558         (*(DWORD **) pData) = NULL;
559     } else {
560         if(This->baseShader.functionLength == 0){
561
562         }
563         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
564         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
565     }
566     return WINED3D_OK;
567 }
568
569 /* Note that for vertex shaders CompileShader isn't called until the
570  * shader is first used. The reason for this is that we need the vertex
571  * declaration the shader will be used with in order to determine if
572  * the data in a register is of type D3DCOLOR, and needs swizzling. */
573 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
574
575     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
576     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
577     HRESULT hr;
578     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
579
580     TRACE("(%p) : pFunction %p\n", iface, pFunction);
581
582     /* First pass: trace shader */
583     shader_trace_init((IWineD3DBaseShader*) This, pFunction);
584     vshader_set_limits(This);
585
586     /* Initialize immediate constant lists */
587     list_init(&This->baseShader.constantsF);
588     list_init(&This->baseShader.constantsB);
589     list_init(&This->baseShader.constantsI);
590
591     /* Second pass: figure out registers used, semantics, etc.. */
592     This->min_rel_offset = GL_LIMITS(vshader_constantsF);
593     This->max_rel_offset = 0;
594     memset(reg_maps, 0, sizeof(shader_reg_maps));
595     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
596        This->semantics_in, This->semantics_out, pFunction, NULL);
597     if (hr != WINED3D_OK) return hr;
598
599     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
600
601     if(deviceImpl->vs_selected_mode == SHADER_ARB &&
602        (GLINFO_LOCATION).arb_vs_offset_limit      &&
603        This->min_rel_offset <= This->max_rel_offset) {
604
605         if(This->max_rel_offset - This->min_rel_offset > 127) {
606             FIXME("The difference between the minimum and maximum relative offset is > 127\n");
607             FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
608             FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
609         } else if(This->max_rel_offset - This->min_rel_offset > 63) {
610             This->rel_offset = This->min_rel_offset + 63;
611         } else if(This->max_rel_offset > 63) {
612             This->rel_offset = This->min_rel_offset;
613         } else {
614             This->rel_offset = 0;
615         }
616     }
617     This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
618
619     /* copy the function ... because it will certainly be released by application */
620     if (NULL != pFunction) {
621         void *function;
622
623         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
624         if (!function) return E_OUTOFMEMORY;
625         memcpy(function, pFunction, This->baseShader.functionLength);
626         This->baseShader.function = function;
627     } else {
628         This->baseShader.function = NULL;
629     }
630
631     return WINED3D_OK;
632 }
633
634 /* Preload semantics for d3d8 shaders */
635 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
636     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
637     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
638
639     int i;
640     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
641         WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
642         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
643     }
644 }
645
646 /* Set local constants for d3d8 shaders */
647 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
648         UINT start_idx, const float *src_data, UINT count) {
649     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
650     UINT i, end_idx;
651
652     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
653
654     end_idx = start_idx + count;
655     if (end_idx > GL_LIMITS(vshader_constantsF)) {
656         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
657         end_idx = GL_LIMITS(vshader_constantsF);
658     }
659
660     for (i = start_idx; i < end_idx; ++i) {
661         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
662         if (!lconst) return E_OUTOFMEMORY;
663
664         lconst->idx = i;
665         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
666         list_add_head(&This->baseShader.constantsF, &lconst->entry);
667     }
668
669     return WINED3D_OK;
670 }
671
672 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
673     UINT i, j, k;
674     BOOL found;
675
676     DWORD usage_token;
677     DWORD usage;
678     DWORD usage_idx;
679
680     for(i = 0; i < vdecl->declarationWNumElements; i++) {
681         for(j = 0; j < MAX_ATTRIBS; j++) {
682             if(!This->baseShader.reg_maps.attributes) continue;
683
684             usage_token = This->semantics_in[j].usage;
685             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
686             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
687
688             if(vdecl->pDeclarationWine[i].Usage != usage ||
689                vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
690                 continue;
691             }
692
693             found = FALSE;
694             for(k = 0; k < This->num_swizzled_attribs; k++) {
695                 if(This->swizzled_attribs[k].usage == usage &&
696                     This->swizzled_attribs[k].idx == usage_idx) {
697                     found = TRUE;
698                 }
699             }
700             if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
701                 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
702                       debug_d3ddeclusage(usage), usage_idx);
703                 return TRUE;
704             }
705             if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
706                 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
707                       debug_d3ddeclusage(usage), usage_idx);
708                 return TRUE;
709             }
710         }
711     }
712     return FALSE;
713 }
714
715 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
716     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
717     IWineD3DVertexDeclarationImpl *vdecl;
718     CONST DWORD *function = This->baseShader.function;
719     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
720
721     TRACE("(%p) : function %p\n", iface, function);
722
723     /* We're already compiled. */
724     if (This->baseShader.is_compiled) {
725         vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
726
727         if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
728            memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
729
730             /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
731              * we have to recompile, but we have to take a deeper look at see if the attribs that differ
732              * are declared in the decl and used in the shader
733              */
734             if(swizzled_attribs_differ(This, vdecl)) {
735                 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
736                 goto recompile;
737             }
738             WARN("Swizzled attribute validation required an expensive comparison\n");
739         }
740
741         return WINED3D_OK;
742
743         recompile:
744         if(This->recompile_count < 50) {
745             This->recompile_count++;
746         } else {
747             FIXME("Vertexshader %p recompiled more than 50 times\n", This);
748         }
749
750         deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
751     }
752
753     /* We don't need to compile */
754     if (!function) {
755         This->baseShader.is_compiled = TRUE;
756         return WINED3D_OK;
757     }
758
759     /* Generate the HW shader */
760     TRACE("(%p) : Generating hardware program\n", This);
761     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
762
763     This->baseShader.is_compiled = TRUE;
764
765     return WINED3D_OK;
766 }
767
768 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
769 {
770     /*** IUnknown methods ***/
771     IWineD3DVertexShaderImpl_QueryInterface,
772     IWineD3DVertexShaderImpl_AddRef,
773     IWineD3DVertexShaderImpl_Release,
774     /*** IWineD3DBase methods ***/
775     IWineD3DVertexShaderImpl_GetParent,
776     /*** IWineD3DBaseShader methods ***/
777     IWineD3DVertexShaderImpl_SetFunction,
778     IWineD3DVertexShaderImpl_CompileShader,
779     /*** IWineD3DVertexShader methods ***/
780     IWineD3DVertexShaderImpl_GetDevice,
781     IWineD3DVertexShaderImpl_GetFunction,
782     IWineD3DVertexShaderImpl_FakeSemantics,
783     IWIneD3DVertexShaderImpl_SetLocalConstantsF
784 };