wined3d: Implement DDBLT_DEPTHFILL on the active depth stencil target.
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24
25 #include "config.h"
26
27 #include <math.h>
28 #include <stdio.h>
29
30 #include "wined3d_private.h"
31
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33
34 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->baseShader.device)->wineD3D))->gl_info
35
36 /* Shader debugging - Change the following line to enable debugging of software
37       vertex shaders                                                             */
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
45
46 /**
47  * DirectX9 SDK download
48  *  http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
49  *
50  * Exploring D3DX
51  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
52  *
53  * Using Vertex Shaders
54  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
55  *
56  * Dx9 New
57  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
58  *
59  * Dx9 Shaders
60  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
64  *
65  * Dx9 D3DX
66  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
67  *
68  * FVF
69  *  http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
70  *
71  * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72  *  http://developer.nvidia.com/view.asp?IO=vstovp
73  *
74  * NVIDIA: Memory Management with VAR
75  *  http://developer.nvidia.com/view.asp?IO=var_memory_management
76  */
77
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
80
81 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
82
83 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
84     /* This table is not order or position dependent. */
85
86     /* Arithmetic */
87     {WINED3DSIO_NOP,    "nop",  "NOP", 0, 0, vshader_hw_map2gl,   NULL, 0, 0},
88     {WINED3DSIO_MOV,    "mov",  "MOV", 1, 2, vshader_hw_map2gl,   shader_glsl_mov, 0, 0},
89     {WINED3DSIO_MOVA,   "mova",  NULL, 1, 2, vshader_hw_map2gl,   shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
90     {WINED3DSIO_ADD,    "add",  "ADD", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
91     {WINED3DSIO_SUB,    "sub",  "SUB", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
92     {WINED3DSIO_MAD,    "mad",  "MAD", 1, 4, vshader_hw_map2gl,   shader_glsl_mad, 0, 0},
93     {WINED3DSIO_MUL,    "mul",  "MUL", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
94     {WINED3DSIO_RCP,    "rcp",  "RCP", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rcp, 0, 0},
95     {WINED3DSIO_RSQ,    "rsq",  "RSQ", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_map2gl, 0, 0},
96     {WINED3DSIO_DP3,    "dp3",  "DP3", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
97     {WINED3DSIO_DP4,    "dp4",  "DP4", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
98     {WINED3DSIO_MIN,    "min",  "MIN", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
99     {WINED3DSIO_MAX,    "max",  "MAX", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
100     {WINED3DSIO_SLT,    "slt",  "SLT", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
101     {WINED3DSIO_SGE,    "sge",  "SGE", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
102     {WINED3DSIO_ABS,    "abs",  "ABS", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
103     {WINED3DSIO_EXP,    "exp",  "EX2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
104     {WINED3DSIO_LOG,    "log",  "LG2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
105     {WINED3DSIO_EXPP,   "expp", "EXP", 1, 2, vshader_hw_map2gl,   shader_glsl_expp, 0, 0},
106     {WINED3DSIO_LOGP,   "logp", "LOG", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
107     {WINED3DSIO_LIT,    "lit",  "LIT", 1, 2, vshader_hw_map2gl,   shader_glsl_lit, 0, 0},
108     {WINED3DSIO_DST,    "dst",  "DST", 1, 3, vshader_hw_map2gl,   shader_glsl_dst, 0, 0},
109     {WINED3DSIO_LRP,    "lrp",  "LRP", 1, 4, NULL,                shader_glsl_lrp, 0, 0},
110     {WINED3DSIO_FRC,    "frc",  "FRC", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
111     {WINED3DSIO_POW,    "pow",  "POW", 1, 3, NULL,                shader_glsl_map2gl, 0, 0},
112     {WINED3DSIO_CRS,    "crs",  "XPS", 1, 3, NULL,                shader_glsl_cross, 0, 0},
113     /* TODO: sng can possibly be performed a  s
114         RCP tmp, vec
115         MUL out, tmp, vec*/
116     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
117     /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
118         DP3 tmp , vec, vec;
119         RSQ tmp, tmp.x;
120         MUL vec.xyz, vec, tmp;
121     but I think this is better because it accounts for w properly.
122         DP3 tmp , vec, vec;
123         RSQ tmp, tmp.x;
124         MUL vec, vec, tmp;
125     */
126     {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
127     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, NULL, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
128     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 2, NULL, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
129     /* Matrix */
130     {WINED3DSIO_M4x4,   "m4x4", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
131     {WINED3DSIO_M4x3,   "m4x3", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
132     {WINED3DSIO_M3x4,   "m3x4", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
133     {WINED3DSIO_M3x3,   "m3x3", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
134     {WINED3DSIO_M3x2,   "m3x2", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
135     /* Declare registers */
136     {WINED3DSIO_DCL,    "dcl",      NULL,                0, 2, NULL, NULL, 0, 0},
137     /* Constant definitions */
138     {WINED3DSIO_DEF,    "def",      NULL,                1, 5, NULL, NULL, 0, 0},
139     {WINED3DSIO_DEFB,   "defb",     GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
140     {WINED3DSIO_DEFI,   "defi",     GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
141     /* Flow control - requires GLSL or software shaders */
142     {WINED3DSIO_REP ,   "rep",      NULL, 0, 1, NULL, shader_glsl_rep,    WINED3DVS_VERSION(2,0), -1},
143     {WINED3DSIO_ENDREP, "endrep",   NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
144     {WINED3DSIO_IF,     "if",       NULL, 0, 1, NULL, shader_glsl_if,     WINED3DVS_VERSION(2,0), -1},
145     {WINED3DSIO_IFC,    "ifc",      NULL, 0, 2, NULL, shader_glsl_ifc,    WINED3DVS_VERSION(2,1), -1},
146     {WINED3DSIO_ELSE,   "else",     NULL, 0, 0, NULL, shader_glsl_else,   WINED3DVS_VERSION(2,0), -1},
147     {WINED3DSIO_ENDIF,  "endif",    NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
148     {WINED3DSIO_BREAK,  "break",    NULL, 0, 0, NULL, shader_glsl_break,  WINED3DVS_VERSION(2,1), -1},
149     {WINED3DSIO_BREAKC, "breakc",   NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
150     {WINED3DSIO_BREAKP, "breakp",   GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
151     {WINED3DSIO_CALL,   "call",     NULL, 0, 1, NULL, shader_glsl_call,   WINED3DVS_VERSION(2,0), -1},
152     {WINED3DSIO_CALLNZ, "callnz",   NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
153     {WINED3DSIO_LOOP,   "loop",     NULL, 0, 2, NULL, shader_glsl_loop,   WINED3DVS_VERSION(2,0), -1},
154     {WINED3DSIO_RET,    "ret",      NULL, 0, 0, NULL, NULL,               WINED3DVS_VERSION(2,0), -1},
155     {WINED3DSIO_ENDLOOP,"endloop",  NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
156     {WINED3DSIO_LABEL,  "label",    NULL, 0, 1, NULL, shader_glsl_label,  WINED3DVS_VERSION(2,0), -1},
157
158     {WINED3DSIO_SETP,   "setp",     GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
159     {WINED3DSIO_TEXLDL, "texdl",    GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
160     {0,                 NULL,       NULL,                0, 0, NULL, NULL, 0, 0}
161 };
162
163 static void vshader_set_limits(
164       IWineD3DVertexShaderImpl *This) {
165
166       This->baseShader.limits.texcoord = 0;
167       This->baseShader.limits.attributes = 16;
168       This->baseShader.limits.packed_input = 0;
169
170       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
171       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
172
173       switch (This->baseShader.hex_version) {
174           case WINED3DVS_VERSION(1,0):
175           case WINED3DVS_VERSION(1,1):
176                    This->baseShader.limits.temporary = 12;
177                    This->baseShader.limits.constant_bool = 0;
178                    This->baseShader.limits.constant_int = 0;
179                    This->baseShader.limits.address = 1;
180                    This->baseShader.limits.packed_output = 0;
181                    This->baseShader.limits.sampler = 0;
182                    This->baseShader.limits.label = 0;
183                    break;
184       
185           case WINED3DVS_VERSION(2,0):
186           case WINED3DVS_VERSION(2,1):
187                    This->baseShader.limits.temporary = 12;
188                    This->baseShader.limits.constant_bool = 16;
189                    This->baseShader.limits.constant_int = 16;
190                    This->baseShader.limits.address = 1;
191                    This->baseShader.limits.packed_output = 0;
192                    This->baseShader.limits.sampler = 0;
193                    This->baseShader.limits.label = 16;
194                    break;
195
196           case WINED3DVS_VERSION(3,0):
197                    This->baseShader.limits.temporary = 32;
198                    This->baseShader.limits.constant_bool = 32;
199                    This->baseShader.limits.constant_int = 32;
200                    This->baseShader.limits.address = 1;
201                    This->baseShader.limits.packed_output = 12;
202                    This->baseShader.limits.sampler = 4;
203                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
204                    break;
205
206           default: This->baseShader.limits.temporary = 12;
207                    This->baseShader.limits.constant_bool = 16;
208                    This->baseShader.limits.constant_int = 16;
209                    This->baseShader.limits.address = 1;
210                    This->baseShader.limits.packed_output = 0;
211                    This->baseShader.limits.sampler = 0;
212                    This->baseShader.limits.label = 16;
213                    FIXME("Unrecognized vertex shader version %#x\n",
214                        This->baseShader.hex_version);
215       }
216 }
217
218 /* This is an internal function,
219  * used to create fake semantics for shaders
220  * that don't have them - d3d8 shaders where the declaration
221  * stores the register for each input
222  */
223 static void vshader_set_input(
224     IWineD3DVertexShaderImpl* This,
225     unsigned int regnum,
226     BYTE usage, BYTE usage_idx) {
227
228     /* Fake usage: set reserved bit, usage, usage_idx */
229     DWORD usage_token = (0x1 << 31) |
230         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
231
232     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
233     DWORD reg_token = (0x1 << 31) |
234         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
235
236     This->semantics_in[regnum].usage = usage_token;
237     This->semantics_in[regnum].reg = reg_token;
238 }
239
240 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
241     if (usage_idx1 != usage_idx2) return FALSE;
242     if (usage1 == usage2) return TRUE;
243     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
244     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
245
246     return FALSE;
247 }
248
249 BOOL vshader_get_input(
250     IWineD3DVertexShader* iface,
251     BYTE usage_req, BYTE usage_idx_req,
252     unsigned int* regnum) {
253
254     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
255     int i;
256
257     for (i = 0; i < MAX_ATTRIBS; i++) {
258         DWORD usage_token = This->semantics_in[i].usage;
259         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
260         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
261
262         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
263             *regnum = i;
264             return TRUE;
265         }
266     }
267     return FALSE;
268 }
269
270 BOOL vshader_input_is_color(
271     IWineD3DVertexShader* iface,
272     unsigned int regnum) {
273
274     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
275     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
276     IWineD3DVertexDeclarationImpl *vertexDeclaration = (IWineD3DVertexDeclarationImpl *)deviceImpl->stateBlock->vertexDecl;
277
278     DWORD usage_token = This->semantics_in[regnum].usage;
279     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
280     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
281
282     if (vertexDeclaration) {
283         int i;
284         /* Find the declaration element that matches our register, then check
285          * if it has D3DCOLOR as it's type. This works for both d3d8 and d3d9. */
286         for (i = 0; i < vertexDeclaration->declarationWNumElements-1; ++i) {
287             WINED3DVERTEXELEMENT *element = vertexDeclaration->pDeclarationWine + i;
288             if (match_usage(element->Usage, element->UsageIndex, usage, usage_idx)) {
289                 return element->Type == WINED3DDECLTYPE_D3DCOLOR;
290             }
291         }
292     }
293
294     ERR("Either no vertexdeclaration present, or register not matched. This should never happen.\n");
295     return FALSE;
296 }
297
298 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
299     or GLSL and send it to the card */
300 static VOID IWineD3DVertexShaderImpl_GenerateShader(
301     IWineD3DVertexShader *iface,
302     shader_reg_maps* reg_maps,
303     CONST DWORD *pFunction) {
304
305     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
306     SHADER_BUFFER buffer;
307
308 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
309         it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
310     if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
311         HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
312         This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
313         This->fixupVertexBufferSize = PGMSIZE;
314         This->fixupVertexBuffer[0] = 0;
315     }
316     buffer.buffer = This->device->fixupVertexBuffer;
317 #else
318     buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); 
319 #endif
320     buffer.bsize = 0;
321     buffer.lineNo = 0;
322     buffer.newline = TRUE;
323
324     if (This->baseShader.shader_mode == SHADER_GLSL) {
325
326         /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
327         GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
328
329         /* Base Declarations */
330         shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
331
332         /* Base Shader Body */
333         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
334
335         /* Unpack 3.0 outputs */
336         if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0))
337             vshader_glsl_output_unpack(&buffer, This->semantics_out);
338
339         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
340         if (!reg_maps->fog)
341             shader_addline(&buffer, "gl_FogFragCoord = gl_Position.z;\n");
342         
343         /* Write the final position.
344          *
345          * OpenGL coordinates specify the center of the pixel while d3d coords specify
346          * the corner. The offsets are stored in z and w in the 2nd row of the projection
347          * matrix to avoid wasting a free shader constant. Add them to the w and z coord
348          * of the 2nd row
349          */
350         shader_addline(&buffer, "gl_Position.x = gl_Position.x + posFixup[2];\n");
351         shader_addline(&buffer, "gl_Position.y = gl_Position.y + posFixup[3];\n");
352         /* Account for any inverted textures (render to texture case) by reversing the y coordinate
353          *  (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices)
354          */
355         shader_addline(&buffer, "gl_Position.y = gl_Position.y * posFixup[1];\n");
356
357         shader_addline(&buffer, "}\n");
358
359         TRACE("Compiling shader object %u\n", shader_obj);
360         GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
361         GL_EXTCALL(glCompileShaderARB(shader_obj));
362         print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
363
364         /* Store the shader object */
365         This->baseShader.prgId = shader_obj;
366
367     } else if (This->baseShader.shader_mode == SHADER_ARB) {
368
369         /*  Create the hw ARB shader */
370         shader_addline(&buffer, "!!ARBvp1.0\n");
371
372         /* Mesa supports only 95 constants */
373         if (GL_VEND(MESA) || GL_VEND(WINE))
374             This->baseShader.limits.constant_float = 
375                 min(95, This->baseShader.limits.constant_float);
376
377         /* Base Declarations */
378         shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
379
380         /* We need a constant to fixup the final position */
381         shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
382
383         /* Base Shader Body */
384         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
385
386         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
387         if (!reg_maps->fog)
388             shader_addline(&buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
389
390         /* Write the final position.
391          *
392          * OpenGL coordinates specify the center of the pixel while d3d coords specify
393          * the corner. The offsets are stored in the 2nd row of the projection matrix,
394          * the x offset in z and the y offset in w. Add them to the resulting position
395          */
396         shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, posFixup.z;\n");
397         shader_addline(&buffer, "ADD TMP_OUT.y, TMP_OUT.y, posFixup.w;\n");
398         /* Account for any inverted textures (render to texture case) by reversing the y coordinate
399          *  (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices)
400          */
401         shader_addline(&buffer, "MUL TMP_OUT.y, TMP_OUT.y, posFixup.y;\n");
402
403         shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
404         
405         shader_addline(&buffer, "END\n"); 
406
407         /* TODO: change to resource.glObjectHandle or something like that */
408         GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
409
410         TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
411         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
412
413         TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
414         /* Create the program and check for errors */
415         GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
416             buffer.bsize, buffer.buffer));
417
418         if (glGetError() == GL_INVALID_OPERATION) {
419             GLint errPos;
420             glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
421             FIXME("HW VertexShader Error at position %d: %s\n",
422                   errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
423             This->baseShader.prgId = -1;
424         }
425     }
426
427 #if 1 /* if were using the data buffer of device then we don't need to free it */
428   HeapFree(GetProcessHeap(), 0, buffer.buffer);
429 #endif
430 }
431
432 /* *******************************************
433    IWineD3DVertexShader IUnknown parts follow
434    ******************************************* */
435 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
436 {
437     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
438     TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
439     if (IsEqualGUID(riid, &IID_IUnknown) 
440         || IsEqualGUID(riid, &IID_IWineD3DBase)
441         || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
442         || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
443         IUnknown_AddRef(iface);
444         *ppobj = This;
445         return S_OK;
446     }
447     *ppobj = NULL;
448     return E_NOINTERFACE;
449 }
450
451 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
452     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
453     TRACE("(%p) : AddRef increasing from %d\n", This, This->ref);
454     return InterlockedIncrement(&This->ref);
455 }
456
457 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
458     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
459     ULONG ref;
460     TRACE("(%p) : Releasing from %d\n", This, This->ref);
461     ref = InterlockedDecrement(&This->ref);
462     if (ref == 0) {
463         if(iface == ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexShader) {
464             /* See comment in PixelShader::Release */
465             IWineD3DDeviceImpl_MarkStateDirty((IWineD3DDeviceImpl *) This->baseShader.device, STATE_VSHADER);
466         }
467
468         if (This->baseShader.shader_mode == SHADER_GLSL && This->baseShader.prgId != 0) {
469             struct list *linked_programs = &This->baseShader.linked_programs;
470
471             TRACE("Deleting linked programs\n");
472             if (linked_programs->next) {
473                 struct glsl_shader_prog_link *entry, *entry2;
474                 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
475                     delete_glsl_program_entry(This->baseShader.device, entry);
476                 }
477             }
478
479             TRACE("Deleting shader object %u\n", This->baseShader.prgId);
480             GL_EXTCALL(glDeleteObjectARB(This->baseShader.prgId));
481             checkGLcall("glDeleteObjectARB");
482         }
483         shader_delete_constant_list(&This->baseShader.constantsF);
484         shader_delete_constant_list(&This->baseShader.constantsB);
485         shader_delete_constant_list(&This->baseShader.constantsI);
486         HeapFree(GetProcessHeap(), 0, This);
487
488     }
489     return ref;
490 }
491
492 /* *******************************************
493    IWineD3DVertexShader IWineD3DVertexShader parts follow
494    ******************************************* */
495
496 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
497     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
498     
499     *parent = This->parent;
500     IUnknown_AddRef(*parent);
501     TRACE("(%p) : returning %p\n", This, *parent);
502     return WINED3D_OK;
503 }
504
505 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
506     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
507     IWineD3DDevice_AddRef(This->baseShader.device);
508     *pDevice = This->baseShader.device;
509     TRACE("(%p) returning %p\n", This, *pDevice);
510     return WINED3D_OK;
511 }
512
513 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
514     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
515     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
516
517     if (NULL == pData) {
518         *pSizeOfData = This->baseShader.functionLength;
519         return WINED3D_OK;
520     }
521     if (*pSizeOfData < This->baseShader.functionLength) {
522         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
523          * than the required size we should write the required size and
524          * return D3DERR_MOREDATA. That's not actually true. */
525         return WINED3DERR_INVALIDCALL;
526     }
527     if (NULL == This->baseShader.function) { /* no function defined */
528         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
529         (*(DWORD **) pData) = NULL;
530     } else {
531         if(This->baseShader.functionLength == 0){
532
533         }
534         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
535         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
536     }
537     return WINED3D_OK;
538 }
539
540 /* Note that for vertex shaders CompileShader isn't called until the
541  * shader is first used. The reason for this is that we need the vertex
542  * declaration the shader will be used with in order to determine if
543  * the data in a register is of type D3DCOLOR, and needs swizzling. */
544 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
545
546     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
547     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
548     HRESULT hr;
549     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
550
551     TRACE("(%p) : pFunction %p\n", iface, pFunction);
552
553     /* First pass: trace shader */
554     shader_trace_init((IWineD3DBaseShader*) This, pFunction);
555     vshader_set_limits(This);
556
557     /* Initialize immediate constant lists */
558     list_init(&This->baseShader.constantsF);
559     list_init(&This->baseShader.constantsB);
560     list_init(&This->baseShader.constantsI);
561
562     /* Second pass: figure out registers used, semantics, etc.. */
563     memset(reg_maps, 0, sizeof(shader_reg_maps));
564     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
565        This->semantics_in, This->semantics_out, pFunction, deviceImpl->stateBlock);
566     if (hr != WINED3D_OK) return hr;
567
568     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
569
570     /* copy the function ... because it will certainly be released by application */
571     if (NULL != pFunction) {
572         void *function;
573
574         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
575         if (!function) return E_OUTOFMEMORY;
576         memcpy(function, pFunction, This->baseShader.functionLength);
577         This->baseShader.function = function;
578     } else {
579         This->baseShader.function = NULL;
580     }
581
582     return WINED3D_OK;
583 }
584
585 /* Preload semantics for d3d8 shaders */
586 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
587     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
588     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
589
590     int i;
591     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
592         WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
593         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
594     }
595 }
596
597 /* Set local constants for d3d8 shaders */
598 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
599         UINT start_idx, const float *src_data, UINT count) {
600     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
601     UINT i, end_idx;
602
603     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
604
605     end_idx = start_idx + count;
606     if (end_idx > GL_LIMITS(vshader_constantsF)) {
607         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
608         end_idx = GL_LIMITS(vshader_constantsF);
609     }
610
611     for (i = start_idx; i < end_idx; ++i) {
612         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
613         if (!lconst) return E_OUTOFMEMORY;
614
615         lconst->idx = i;
616         CopyMemory(lconst->value, src_data + i * 4, 4 * sizeof(float));
617         list_add_head(&This->baseShader.constantsF, &lconst->entry);
618     }
619
620     return WINED3D_OK;
621 }
622
623 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
624     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
625     CONST DWORD *function = This->baseShader.function;
626
627     TRACE("(%p) : function %p\n", iface, function);
628
629     /* We're already compiled. */
630     if (This->baseShader.is_compiled) return WINED3D_OK;
631
632     /* We don't need to compile */
633     if (!function) {
634         This->baseShader.is_compiled = TRUE;
635         return WINED3D_OK;
636     }
637
638     /* Generate the HW shader */
639     TRACE("(%p) : Generating hardware program\n", This);
640     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
641
642     This->baseShader.is_compiled = TRUE;
643
644     return WINED3D_OK;
645 }
646
647 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
648 {
649     /*** IUnknown methods ***/
650     IWineD3DVertexShaderImpl_QueryInterface,
651     IWineD3DVertexShaderImpl_AddRef,
652     IWineD3DVertexShaderImpl_Release,
653     /*** IWineD3DBase methods ***/
654     IWineD3DVertexShaderImpl_GetParent,
655     /*** IWineD3DBaseShader methods ***/
656     IWineD3DVertexShaderImpl_SetFunction,
657     IWineD3DVertexShaderImpl_CompileShader,
658     /*** IWineD3DVertexShader methods ***/
659     IWineD3DVertexShaderImpl_GetDevice,
660     IWineD3DVertexShaderImpl_GetFunction,
661     IWineD3DVertexShaderImpl_FakeSemantics,
662     IWIneD3DVertexShaderImpl_SetLocalConstantsF
663 };