shell32: Constify some variables.
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24
25 #include "config.h"
26
27 #include <math.h>
28 #include <stdio.h>
29
30 #include "wined3d_private.h"
31
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
35
36 /* Shader debugging - Change the following line to enable debugging of software
37       vertex shaders                                                             */
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
45
46 /**
47  * DirectX9 SDK download
48  *  http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
49  *
50  * Exploring D3DX
51  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
52  *
53  * Using Vertex Shaders
54  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
55  *
56  * Dx9 New
57  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
58  *
59  * Dx9 Shaders
60  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
64  *
65  * Dx9 D3DX
66  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
67  *
68  * FVF
69  *  http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
70  *
71  * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72  *  http://developer.nvidia.com/view.asp?IO=vstovp
73  *
74  * NVIDIA: Memory Management with VAR
75  *  http://developer.nvidia.com/view.asp?IO=var_memory_management
76  */
77
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
80
81 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
82
83 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
84     /* This table is not order or position dependent. */
85
86     /* Arithmetic */
87     {WINED3DSIO_NOP,    "nop",  "NOP", 0, 0, vshader_hw_map2gl,   NULL, 0, 0},
88     {WINED3DSIO_MOV,    "mov",  "MOV", 1, 2, vshader_hw_map2gl,   shader_glsl_mov, 0, 0},
89     {WINED3DSIO_MOVA,   "mova",  NULL, 1, 2, vshader_hw_map2gl,   shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
90     {WINED3DSIO_ADD,    "add",  "ADD", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
91     {WINED3DSIO_SUB,    "sub",  "SUB", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
92     {WINED3DSIO_MAD,    "mad",  "MAD", 1, 4, vshader_hw_map2gl,   shader_glsl_mad, 0, 0},
93     {WINED3DSIO_MUL,    "mul",  "MUL", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
94     {WINED3DSIO_RCP,    "rcp",  "RCP", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rcp, 0, 0},
95     {WINED3DSIO_RSQ,    "rsq",  "RSQ", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rsq, 0, 0},
96     {WINED3DSIO_DP3,    "dp3",  "DP3", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
97     {WINED3DSIO_DP4,    "dp4",  "DP4", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
98     {WINED3DSIO_MIN,    "min",  "MIN", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
99     {WINED3DSIO_MAX,    "max",  "MAX", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
100     {WINED3DSIO_SLT,    "slt",  "SLT", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
101     {WINED3DSIO_SGE,    "sge",  "SGE", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
102     {WINED3DSIO_ABS,    "abs",  "ABS", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
103     {WINED3DSIO_EXP,    "exp",  "EX2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
104     {WINED3DSIO_LOG,    "log",  "LG2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
105     {WINED3DSIO_EXPP,   "expp", "EXP", 1, 2, vshader_hw_map2gl,   shader_glsl_expp, 0, 0},
106     {WINED3DSIO_LOGP,   "logp", "LOG", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
107     {WINED3DSIO_LIT,    "lit",  "LIT", 1, 2, vshader_hw_map2gl,   shader_glsl_lit, 0, 0},
108     {WINED3DSIO_DST,    "dst",  "DST", 1, 3, vshader_hw_map2gl,   shader_glsl_dst, 0, 0},
109     {WINED3DSIO_LRP,    "lrp",  "LRP", 1, 4, NULL,                shader_glsl_lrp, 0, 0},
110     {WINED3DSIO_FRC,    "frc",  "FRC", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
111     {WINED3DSIO_POW,    "pow",  "POW", 1, 3, vshader_hw_map2gl,   shader_glsl_pow, 0, 0},
112     {WINED3DSIO_CRS,    "crs",  "XPD", 1, 3, vshader_hw_map2gl,   shader_glsl_cross, 0, 0},
113     /* TODO: sng can possibly be performed a  s
114         RCP tmp, vec
115         MUL out, tmp, vec*/
116     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
117     {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
118     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
119     {WINED3DSIO_SINCOS, "sincos",  "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
120     /* Matrix */
121     {WINED3DSIO_M4x4,   "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
122     {WINED3DSIO_M4x3,   "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
123     {WINED3DSIO_M3x4,   "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
124     {WINED3DSIO_M3x3,   "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
125     {WINED3DSIO_M3x2,   "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
126     /* Declare registers */
127     {WINED3DSIO_DCL,    "dcl",      NULL,                0, 2, NULL, NULL, 0, 0},
128     /* Constant definitions */
129     {WINED3DSIO_DEF,    "def",      NULL,                1, 5, NULL, NULL, 0, 0},
130     {WINED3DSIO_DEFB,   "defb",     GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
131     {WINED3DSIO_DEFI,   "defi",     GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
132     /* Flow control - requires GLSL or software shaders */
133     {WINED3DSIO_REP ,   "rep",      NULL, 0, 1, NULL, shader_glsl_rep,    WINED3DVS_VERSION(2,0), -1},
134     {WINED3DSIO_ENDREP, "endrep",   NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
135     {WINED3DSIO_IF,     "if",       NULL, 0, 1, NULL, shader_glsl_if,     WINED3DVS_VERSION(2,0), -1},
136     {WINED3DSIO_IFC,    "ifc",      NULL, 0, 2, NULL, shader_glsl_ifc,    WINED3DVS_VERSION(2,1), -1},
137     {WINED3DSIO_ELSE,   "else",     NULL, 0, 0, NULL, shader_glsl_else,   WINED3DVS_VERSION(2,0), -1},
138     {WINED3DSIO_ENDIF,  "endif",    NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
139     {WINED3DSIO_BREAK,  "break",    NULL, 0, 0, NULL, shader_glsl_break,  WINED3DVS_VERSION(2,1), -1},
140     {WINED3DSIO_BREAKC, "breakc",   NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
141     {WINED3DSIO_BREAKP, "breakp",   GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
142     {WINED3DSIO_CALL,   "call",     NULL, 0, 1, NULL, shader_glsl_call,   WINED3DVS_VERSION(2,0), -1},
143     {WINED3DSIO_CALLNZ, "callnz",   NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
144     {WINED3DSIO_LOOP,   "loop",     NULL, 0, 2, NULL, shader_glsl_loop,   WINED3DVS_VERSION(2,0), -1},
145     {WINED3DSIO_RET,    "ret",      NULL, 0, 0, NULL, NULL,               WINED3DVS_VERSION(2,0), -1},
146     {WINED3DSIO_ENDLOOP,"endloop",  NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
147     {WINED3DSIO_LABEL,  "label",    NULL, 0, 1, NULL, shader_glsl_label,  WINED3DVS_VERSION(2,0), -1},
148
149     {WINED3DSIO_SETP,   "setp",     GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
150     {WINED3DSIO_TEXLDL, "texldl",   NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
151     {0,                 NULL,       NULL,                0, 0, NULL, NULL, 0, 0}
152 };
153
154 static void vshader_set_limits(
155       IWineD3DVertexShaderImpl *This) {
156
157       This->baseShader.limits.texcoord = 0;
158       This->baseShader.limits.attributes = 16;
159       This->baseShader.limits.packed_input = 0;
160
161       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
162       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
163
164       switch (This->baseShader.hex_version) {
165           case WINED3DVS_VERSION(1,0):
166           case WINED3DVS_VERSION(1,1):
167                    This->baseShader.limits.temporary = 12;
168                    This->baseShader.limits.constant_bool = 0;
169                    This->baseShader.limits.constant_int = 0;
170                    This->baseShader.limits.address = 1;
171                    This->baseShader.limits.packed_output = 0;
172                    This->baseShader.limits.sampler = 0;
173                    This->baseShader.limits.label = 0;
174                    break;
175       
176           case WINED3DVS_VERSION(2,0):
177           case WINED3DVS_VERSION(2,1):
178                    This->baseShader.limits.temporary = 12;
179                    This->baseShader.limits.constant_bool = 16;
180                    This->baseShader.limits.constant_int = 16;
181                    This->baseShader.limits.address = 1;
182                    This->baseShader.limits.packed_output = 0;
183                    This->baseShader.limits.sampler = 0;
184                    This->baseShader.limits.label = 16;
185                    break;
186
187           case WINED3DVS_VERSION(3,0):
188                    This->baseShader.limits.temporary = 32;
189                    This->baseShader.limits.constant_bool = 32;
190                    This->baseShader.limits.constant_int = 32;
191                    This->baseShader.limits.address = 1;
192                    This->baseShader.limits.packed_output = 12;
193                    This->baseShader.limits.sampler = 4;
194                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
195                    break;
196
197           default: This->baseShader.limits.temporary = 12;
198                    This->baseShader.limits.constant_bool = 16;
199                    This->baseShader.limits.constant_int = 16;
200                    This->baseShader.limits.address = 1;
201                    This->baseShader.limits.packed_output = 0;
202                    This->baseShader.limits.sampler = 0;
203                    This->baseShader.limits.label = 16;
204                    FIXME("Unrecognized vertex shader version %#x\n",
205                        This->baseShader.hex_version);
206       }
207 }
208
209 /* This is an internal function,
210  * used to create fake semantics for shaders
211  * that don't have them - d3d8 shaders where the declaration
212  * stores the register for each input
213  */
214 static void vshader_set_input(
215     IWineD3DVertexShaderImpl* This,
216     unsigned int regnum,
217     BYTE usage, BYTE usage_idx) {
218
219     /* Fake usage: set reserved bit, usage, usage_idx */
220     DWORD usage_token = (0x1 << 31) |
221         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
222
223     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
224     DWORD reg_token = (0x1 << 31) |
225         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
226
227     This->semantics_in[regnum].usage = usage_token;
228     This->semantics_in[regnum].reg = reg_token;
229 }
230
231 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
232     if (usage_idx1 != usage_idx2) return FALSE;
233     if (usage1 == usage2) return TRUE;
234     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
235     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
236
237     return FALSE;
238 }
239
240 BOOL vshader_get_input(
241     IWineD3DVertexShader* iface,
242     BYTE usage_req, BYTE usage_idx_req,
243     unsigned int* regnum) {
244
245     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
246     int i;
247
248     for (i = 0; i < MAX_ATTRIBS; i++) {
249         DWORD usage_token = This->semantics_in[i].usage;
250         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
251         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
252
253         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
254             *regnum = i;
255             return TRUE;
256         }
257     }
258     return FALSE;
259 }
260
261 BOOL vshader_input_is_color(
262     IWineD3DVertexShader* iface,
263     unsigned int regnum) {
264
265     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
266     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
267     IWineD3DVertexDeclarationImpl *vertexDeclaration = (IWineD3DVertexDeclarationImpl *)deviceImpl->stateBlock->vertexDecl;
268
269     DWORD usage_token = This->semantics_in[regnum].usage;
270     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
271     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
272
273     if (vertexDeclaration) {
274         int i;
275         /* Find the declaration element that matches our register, then check
276          * if it has D3DCOLOR as it's type. This works for both d3d8 and d3d9. */
277         for (i = 0; i < vertexDeclaration->declarationWNumElements-1; ++i) {
278             WINED3DVERTEXELEMENT *element = vertexDeclaration->pDeclarationWine + i;
279             if (match_usage(element->Usage, element->UsageIndex, usage, usage_idx)) {
280                 return element->Type == WINED3DDECLTYPE_D3DCOLOR;
281             }
282         }
283     }
284
285     ERR("Either no vertexdeclaration present, or register not matched. This should never happen.\n");
286     return FALSE;
287 }
288
289 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
290     or GLSL and send it to the card */
291 static VOID IWineD3DVertexShaderImpl_GenerateShader(
292     IWineD3DVertexShader *iface,
293     shader_reg_maps* reg_maps,
294     CONST DWORD *pFunction) {
295
296     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
297     SHADER_BUFFER buffer;
298
299 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
300         it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
301     if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
302         HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
303         This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
304         This->fixupVertexBufferSize = PGMSIZE;
305         This->fixupVertexBuffer[0] = 0;
306     }
307     buffer.buffer = This->device->fixupVertexBuffer;
308 #else
309     buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); 
310 #endif
311     buffer.bsize = 0;
312     buffer.lineNo = 0;
313     buffer.newline = TRUE;
314
315     if (This->baseShader.shader_mode == SHADER_GLSL) {
316
317         /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
318         GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
319
320         /* Base Declarations */
321         shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
322
323         /* Base Shader Body */
324         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
325
326         /* Unpack 3.0 outputs */
327         if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0))
328             vshader_glsl_output_unpack(&buffer, This->semantics_out);
329
330         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
331         if (!reg_maps->fog)
332             shader_addline(&buffer, "gl_FogFragCoord = gl_Position.z;\n");
333         
334         /* Write the final position.
335          *
336          * OpenGL coordinates specify the center of the pixel while d3d coords specify
337          * the corner. The offsets are stored in z and w in the 2nd row of the projection
338          * matrix to avoid wasting a free shader constant. Add them to the w and z coord
339          * of the 2nd row
340          */
341         shader_addline(&buffer, "gl_Position.x = gl_Position.x + posFixup[2];\n");
342         shader_addline(&buffer, "gl_Position.y = gl_Position.y + posFixup[3];\n");
343         /* Account for any inverted textures (render to texture case) by reversing the y coordinate
344          *  (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices)
345          */
346         shader_addline(&buffer, "gl_Position.y = gl_Position.y * posFixup[1];\n");
347         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
348          *
349          * Basically we want(in homogenous coordinates) z = z * 2 - 1. However, shaders are run
350          * before the homogenous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
351          * which is the same as z = z / 2 - w.
352          */
353         shader_addline(&buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
354
355         shader_addline(&buffer, "}\n");
356
357         TRACE("Compiling shader object %u\n", shader_obj);
358         GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
359         GL_EXTCALL(glCompileShaderARB(shader_obj));
360         print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
361
362         /* Store the shader object */
363         This->baseShader.prgId = shader_obj;
364
365     } else if (This->baseShader.shader_mode == SHADER_ARB) {
366
367         /*  Create the hw ARB shader */
368         shader_addline(&buffer, "!!ARBvp1.0\n");
369         shader_addline(&buffer, "PARAM zfixup = { 2.0, -1.0, 0.0, 0.0 };\n");
370
371         /* Mesa supports only 95 constants */
372         if (GL_VEND(MESA) || GL_VEND(WINE))
373             This->baseShader.limits.constant_float = 
374                 min(95, This->baseShader.limits.constant_float);
375
376         /* Some instructions need a temporary register. Add it if needed, but only if it is really needed */
377         if(reg_maps->usesnrm) {
378             shader_addline(&buffer, "TEMP TMP;\n");
379         }
380
381         /* Base Declarations */
382         shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
383
384         /* We need a constant to fixup the final position */
385         shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
386
387         /* Base Shader Body */
388         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
389
390         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
391         if (!reg_maps->fog)
392             shader_addline(&buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
393
394         /* Write the final position.
395          *
396          * OpenGL coordinates specify the center of the pixel while d3d coords specify
397          * the corner. The offsets are stored in the 2nd row of the projection matrix,
398          * the x offset in z and the y offset in w. Add them to the resulting position
399          */
400         shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, posFixup.z;\n");
401         shader_addline(&buffer, "ADD TMP_OUT.y, TMP_OUT.y, posFixup.w;\n");
402         /* Account for any inverted textures (render to texture case) by reversing the y coordinate
403          *  (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices)
404          */
405         shader_addline(&buffer, "MUL TMP_OUT.y, TMP_OUT.y, posFixup.y;\n");
406         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
407          * and the glsl equivalent
408          */
409         shader_addline(&buffer, "MAD TMP_OUT.z, TMP_OUT.z, zfixup.x, -TMP_OUT.w;\n");
410
411         shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
412         
413         shader_addline(&buffer, "END\n"); 
414
415         /* TODO: change to resource.glObjectHandle or something like that */
416         GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
417
418         TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
419         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
420
421         TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
422         /* Create the program and check for errors */
423         GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
424             buffer.bsize, buffer.buffer));
425
426         if (glGetError() == GL_INVALID_OPERATION) {
427             GLint errPos;
428             glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
429             FIXME("HW VertexShader Error at position %d: %s\n",
430                   errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
431             This->baseShader.prgId = -1;
432         }
433     }
434
435 #if 1 /* if were using the data buffer of device then we don't need to free it */
436   HeapFree(GetProcessHeap(), 0, buffer.buffer);
437 #endif
438 }
439
440 /* *******************************************
441    IWineD3DVertexShader IUnknown parts follow
442    ******************************************* */
443 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
444 {
445     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
446     TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
447     if (IsEqualGUID(riid, &IID_IUnknown) 
448         || IsEqualGUID(riid, &IID_IWineD3DBase)
449         || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
450         || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
451         IUnknown_AddRef(iface);
452         *ppobj = This;
453         return S_OK;
454     }
455     *ppobj = NULL;
456     return E_NOINTERFACE;
457 }
458
459 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
460     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
461     TRACE("(%p) : AddRef increasing from %d\n", This, This->ref);
462     return InterlockedIncrement(&This->ref);
463 }
464
465 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
466     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
467     ULONG ref;
468     TRACE("(%p) : Releasing from %d\n", This, This->ref);
469     ref = InterlockedDecrement(&This->ref);
470     if (ref == 0) {
471         if (This->baseShader.shader_mode == SHADER_GLSL && This->baseShader.prgId != 0) {
472             struct list *linked_programs = &This->baseShader.linked_programs;
473
474             TRACE("Deleting linked programs\n");
475             if (linked_programs->next) {
476                 struct glsl_shader_prog_link *entry, *entry2;
477                 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
478                     delete_glsl_program_entry(This->baseShader.device, entry);
479                 }
480             }
481
482             TRACE("Deleting shader object %u\n", This->baseShader.prgId);
483             GL_EXTCALL(glDeleteObjectARB(This->baseShader.prgId));
484             checkGLcall("glDeleteObjectARB");
485         }
486         shader_delete_constant_list(&This->baseShader.constantsF);
487         shader_delete_constant_list(&This->baseShader.constantsB);
488         shader_delete_constant_list(&This->baseShader.constantsI);
489         HeapFree(GetProcessHeap(), 0, This);
490
491     }
492     return ref;
493 }
494
495 /* *******************************************
496    IWineD3DVertexShader IWineD3DVertexShader parts follow
497    ******************************************* */
498
499 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
500     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
501     
502     *parent = This->parent;
503     IUnknown_AddRef(*parent);
504     TRACE("(%p) : returning %p\n", This, *parent);
505     return WINED3D_OK;
506 }
507
508 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
509     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
510     IWineD3DDevice_AddRef(This->baseShader.device);
511     *pDevice = This->baseShader.device;
512     TRACE("(%p) returning %p\n", This, *pDevice);
513     return WINED3D_OK;
514 }
515
516 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
517     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
518     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
519
520     if (NULL == pData) {
521         *pSizeOfData = This->baseShader.functionLength;
522         return WINED3D_OK;
523     }
524     if (*pSizeOfData < This->baseShader.functionLength) {
525         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
526          * than the required size we should write the required size and
527          * return D3DERR_MOREDATA. That's not actually true. */
528         return WINED3DERR_INVALIDCALL;
529     }
530     if (NULL == This->baseShader.function) { /* no function defined */
531         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
532         (*(DWORD **) pData) = NULL;
533     } else {
534         if(This->baseShader.functionLength == 0){
535
536         }
537         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
538         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
539     }
540     return WINED3D_OK;
541 }
542
543 /* Note that for vertex shaders CompileShader isn't called until the
544  * shader is first used. The reason for this is that we need the vertex
545  * declaration the shader will be used with in order to determine if
546  * the data in a register is of type D3DCOLOR, and needs swizzling. */
547 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
548
549     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
550     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
551     HRESULT hr;
552     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
553
554     TRACE("(%p) : pFunction %p\n", iface, pFunction);
555
556     /* First pass: trace shader */
557     shader_trace_init((IWineD3DBaseShader*) This, pFunction);
558     vshader_set_limits(This);
559
560     /* Initialize immediate constant lists */
561     list_init(&This->baseShader.constantsF);
562     list_init(&This->baseShader.constantsB);
563     list_init(&This->baseShader.constantsI);
564
565     /* Second pass: figure out registers used, semantics, etc.. */
566     memset(reg_maps, 0, sizeof(shader_reg_maps));
567     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
568        This->semantics_in, This->semantics_out, pFunction, NULL);
569     if (hr != WINED3D_OK) return hr;
570
571     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
572
573     /* copy the function ... because it will certainly be released by application */
574     if (NULL != pFunction) {
575         void *function;
576
577         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
578         if (!function) return E_OUTOFMEMORY;
579         memcpy(function, pFunction, This->baseShader.functionLength);
580         This->baseShader.function = function;
581     } else {
582         This->baseShader.function = NULL;
583     }
584
585     return WINED3D_OK;
586 }
587
588 /* Preload semantics for d3d8 shaders */
589 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
590     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
591     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
592
593     int i;
594     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
595         WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
596         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
597     }
598 }
599
600 /* Set local constants for d3d8 shaders */
601 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
602         UINT start_idx, const float *src_data, UINT count) {
603     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
604     UINT i, end_idx;
605
606     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
607
608     end_idx = start_idx + count;
609     if (end_idx > GL_LIMITS(vshader_constantsF)) {
610         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
611         end_idx = GL_LIMITS(vshader_constantsF);
612     }
613
614     for (i = start_idx; i < end_idx; ++i) {
615         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
616         if (!lconst) return E_OUTOFMEMORY;
617
618         lconst->idx = i;
619         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
620         list_add_head(&This->baseShader.constantsF, &lconst->entry);
621     }
622
623     return WINED3D_OK;
624 }
625
626 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
627     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
628     CONST DWORD *function = This->baseShader.function;
629
630     TRACE("(%p) : function %p\n", iface, function);
631
632     /* We're already compiled. */
633     if (This->baseShader.is_compiled) return WINED3D_OK;
634
635     /* We don't need to compile */
636     if (!function) {
637         This->baseShader.is_compiled = TRUE;
638         return WINED3D_OK;
639     }
640
641     /* Generate the HW shader */
642     TRACE("(%p) : Generating hardware program\n", This);
643     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
644
645     This->baseShader.is_compiled = TRUE;
646
647     return WINED3D_OK;
648 }
649
650 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
651 {
652     /*** IUnknown methods ***/
653     IWineD3DVertexShaderImpl_QueryInterface,
654     IWineD3DVertexShaderImpl_AddRef,
655     IWineD3DVertexShaderImpl_Release,
656     /*** IWineD3DBase methods ***/
657     IWineD3DVertexShaderImpl_GetParent,
658     /*** IWineD3DBaseShader methods ***/
659     IWineD3DVertexShaderImpl_SetFunction,
660     IWineD3DVertexShaderImpl_CompileShader,
661     /*** IWineD3DVertexShader methods ***/
662     IWineD3DVertexShaderImpl_GetDevice,
663     IWineD3DVertexShaderImpl_GetFunction,
664     IWineD3DVertexShaderImpl_FakeSemantics,
665     IWIneD3DVertexShaderImpl_SetLocalConstantsF
666 };