kernel32: GlobalMemoryStatusEx: return the size of physical memory + swapsize in...
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24
25 #include "config.h"
26
27 #include <math.h>
28 #include <stdio.h>
29
30 #include "wined3d_private.h"
31
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33
34 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->baseShader.device)->wineD3D))->gl_info
35
36 /* Shader debugging - Change the following line to enable debugging of software
37       vertex shaders                                                             */
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
45
46 /**
47  * DirectX9 SDK download
48  *  http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
49  *
50  * Exploring D3DX
51  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
52  *
53  * Using Vertex Shaders
54  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
55  *
56  * Dx9 New
57  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
58  *
59  * Dx9 Shaders
60  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
64  *
65  * Dx9 D3DX
66  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
67  *
68  * FVF
69  *  http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
70  *
71  * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72  *  http://developer.nvidia.com/view.asp?IO=vstovp
73  *
74  * NVIDIA: Memory Management with VAR
75  *  http://developer.nvidia.com/view.asp?IO=var_memory_management
76  */
77
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
80
81 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
82
83 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
84     /* This table is not order or position dependent. */
85
86     /* Arithmetic */
87     {WINED3DSIO_NOP,    "nop",  "NOP", 0, 0, vshader_hw_map2gl,   NULL, 0, 0},
88     {WINED3DSIO_MOV,    "mov",  "MOV", 1, 2, vshader_hw_map2gl,   shader_glsl_mov, 0, 0},
89     {WINED3DSIO_MOVA,   "mova",  NULL, 1, 2, vshader_hw_map2gl,   shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
90     {WINED3DSIO_ADD,    "add",  "ADD", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
91     {WINED3DSIO_SUB,    "sub",  "SUB", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
92     {WINED3DSIO_MAD,    "mad",  "MAD", 1, 4, vshader_hw_map2gl,   shader_glsl_mad, 0, 0},
93     {WINED3DSIO_MUL,    "mul",  "MUL", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
94     {WINED3DSIO_RCP,    "rcp",  "RCP", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rcp, 0, 0},
95     {WINED3DSIO_RSQ,    "rsq",  "RSQ", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_map2gl, 0, 0},
96     {WINED3DSIO_DP3,    "dp3",  "DP3", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
97     {WINED3DSIO_DP4,    "dp4",  "DP4", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
98     {WINED3DSIO_MIN,    "min",  "MIN", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
99     {WINED3DSIO_MAX,    "max",  "MAX", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
100     {WINED3DSIO_SLT,    "slt",  "SLT", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
101     {WINED3DSIO_SGE,    "sge",  "SGE", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
102     {WINED3DSIO_ABS,    "abs",  "ABS", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
103     {WINED3DSIO_EXP,    "exp",  "EX2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
104     {WINED3DSIO_LOG,    "log",  "LG2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
105     {WINED3DSIO_EXPP,   "expp", "EXP", 1, 2, vshader_hw_map2gl,   shader_glsl_expp, 0, 0},
106     {WINED3DSIO_LOGP,   "logp", "LOG", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
107     {WINED3DSIO_LIT,    "lit",  "LIT", 1, 2, vshader_hw_map2gl,   shader_glsl_lit, 0, 0},
108     {WINED3DSIO_DST,    "dst",  "DST", 1, 3, vshader_hw_map2gl,   shader_glsl_dst, 0, 0},
109     {WINED3DSIO_LRP,    "lrp",  "LRP", 1, 4, NULL,                shader_glsl_lrp, 0, 0},
110     {WINED3DSIO_FRC,    "frc",  "FRC", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
111     {WINED3DSIO_POW,    "pow",  "POW", 1, 3, NULL,                shader_glsl_map2gl, 0, 0},
112     {WINED3DSIO_CRS,    "crs",  "XPS", 1, 3, NULL,                shader_glsl_cross, 0, 0},
113     /* TODO: sng can possibly be performed a  s
114         RCP tmp, vec
115         MUL out, tmp, vec*/
116     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
117     /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
118         DP3 tmp , vec, vec;
119         RSQ tmp, tmp.x;
120         MUL vec.xyz, vec, tmp;
121     but I think this is better because it accounts for w properly.
122         DP3 tmp , vec, vec;
123         RSQ tmp, tmp.x;
124         MUL vec, vec, tmp;
125     */
126     {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
127     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, NULL, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
128     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 2, NULL, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
129     /* Matrix */
130     {WINED3DSIO_M4x4,   "m4x4", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
131     {WINED3DSIO_M4x3,   "m4x3", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
132     {WINED3DSIO_M3x4,   "m3x4", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
133     {WINED3DSIO_M3x3,   "m3x3", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
134     {WINED3DSIO_M3x2,   "m3x2", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
135     /* Declare registers */
136     {WINED3DSIO_DCL,    "dcl",      NULL,                0, 2, NULL, NULL, 0, 0},
137     /* Constant definitions */
138     {WINED3DSIO_DEF,    "def",      NULL,                1, 5, NULL, NULL, 0, 0},
139     {WINED3DSIO_DEFB,   "defb",     GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
140     {WINED3DSIO_DEFI,   "defi",     GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
141     /* Flow control - requires GLSL or software shaders */
142     {WINED3DSIO_REP ,   "rep",      NULL, 0, 1, NULL, shader_glsl_rep,    WINED3DVS_VERSION(2,0), -1},
143     {WINED3DSIO_ENDREP, "endrep",   NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
144     {WINED3DSIO_IF,     "if",       NULL, 0, 1, NULL, shader_glsl_if,     WINED3DVS_VERSION(2,0), -1},
145     {WINED3DSIO_IFC,    "ifc",      NULL, 0, 2, NULL, shader_glsl_ifc,    WINED3DVS_VERSION(2,1), -1},
146     {WINED3DSIO_ELSE,   "else",     NULL, 0, 0, NULL, shader_glsl_else,   WINED3DVS_VERSION(2,0), -1},
147     {WINED3DSIO_ENDIF,  "endif",    NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
148     {WINED3DSIO_BREAK,  "break",    NULL, 0, 0, NULL, shader_glsl_break,  WINED3DVS_VERSION(2,1), -1},
149     {WINED3DSIO_BREAKC, "breakc",   NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
150     {WINED3DSIO_BREAKP, "breakp",   GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
151     {WINED3DSIO_CALL,   "call",     NULL, 0, 1, NULL, shader_glsl_call,   WINED3DVS_VERSION(2,0), -1},
152     {WINED3DSIO_CALLNZ, "callnz",   NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
153     {WINED3DSIO_LOOP,   "loop",     NULL, 0, 2, NULL, shader_glsl_loop,   WINED3DVS_VERSION(2,0), -1},
154     {WINED3DSIO_RET,    "ret",      NULL, 0, 0, NULL, NULL,               WINED3DVS_VERSION(2,0), -1},
155     {WINED3DSIO_ENDLOOP,"endloop",  NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
156     {WINED3DSIO_LABEL,  "label",    NULL, 0, 1, NULL, shader_glsl_label,  WINED3DVS_VERSION(2,0), -1},
157
158     {WINED3DSIO_SETP,   "setp",     GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
159     {WINED3DSIO_TEXLDL, "texdl",    GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
160     {0,                 NULL,       NULL,                0, 0, NULL, NULL, 0, 0}
161 };
162
163 static void vshader_set_limits(
164       IWineD3DVertexShaderImpl *This) {
165
166       This->baseShader.limits.texcoord = 0;
167       This->baseShader.limits.attributes = 16;
168       This->baseShader.limits.packed_input = 0;
169
170       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
171       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
172
173       switch (This->baseShader.hex_version) {
174           case WINED3DVS_VERSION(1,0):
175           case WINED3DVS_VERSION(1,1):
176                    This->baseShader.limits.temporary = 12;
177                    This->baseShader.limits.constant_bool = 0;
178                    This->baseShader.limits.constant_int = 0;
179                    This->baseShader.limits.address = 1;
180                    This->baseShader.limits.packed_output = 0;
181                    This->baseShader.limits.sampler = 0;
182                    This->baseShader.limits.label = 0;
183                    break;
184       
185           case WINED3DVS_VERSION(2,0):
186           case WINED3DVS_VERSION(2,1):
187                    This->baseShader.limits.temporary = 12;
188                    This->baseShader.limits.constant_bool = 16;
189                    This->baseShader.limits.constant_int = 16;
190                    This->baseShader.limits.address = 1;
191                    This->baseShader.limits.packed_output = 0;
192                    This->baseShader.limits.sampler = 0;
193                    This->baseShader.limits.label = 16;
194                    break;
195
196           case WINED3DVS_VERSION(3,0):
197                    This->baseShader.limits.temporary = 32;
198                    This->baseShader.limits.constant_bool = 32;
199                    This->baseShader.limits.constant_int = 32;
200                    This->baseShader.limits.address = 1;
201                    This->baseShader.limits.packed_output = 12;
202                    This->baseShader.limits.sampler = 4;
203                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
204                    break;
205
206           default: This->baseShader.limits.temporary = 12;
207                    This->baseShader.limits.constant_bool = 16;
208                    This->baseShader.limits.constant_int = 16;
209                    This->baseShader.limits.address = 1;
210                    This->baseShader.limits.packed_output = 0;
211                    This->baseShader.limits.sampler = 0;
212                    This->baseShader.limits.label = 16;
213                    FIXME("Unrecognized vertex shader version %#x\n",
214                        This->baseShader.hex_version);
215       }
216 }
217
218 /* This is an internal function,
219  * used to create fake semantics for shaders
220  * that don't have them - d3d8 shaders where the declaration
221  * stores the register for each input
222  */
223 static void vshader_set_input(
224     IWineD3DVertexShaderImpl* This,
225     unsigned int regnum,
226     BYTE usage, BYTE usage_idx) {
227
228     /* Fake usage: set reserved bit, usage, usage_idx */
229     DWORD usage_token = (0x1 << 31) |
230         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
231
232     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
233     DWORD reg_token = (0x1 << 31) |
234         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
235
236     This->semantics_in[regnum].usage = usage_token;
237     This->semantics_in[regnum].reg = reg_token;
238 }
239
240 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
241     if (usage_idx1 != usage_idx2) return FALSE;
242     if (usage1 == usage2) return TRUE;
243     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
244     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
245
246     return FALSE;
247 }
248
249 BOOL vshader_get_input(
250     IWineD3DVertexShader* iface,
251     BYTE usage_req, BYTE usage_idx_req,
252     unsigned int* regnum) {
253
254     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
255     int i;
256
257     for (i = 0; i < MAX_ATTRIBS; i++) {
258         DWORD usage_token = This->semantics_in[i].usage;
259         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
260         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
261
262         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
263             *regnum = i;
264             return TRUE;
265         }
266     }
267     return FALSE;
268 }
269
270 BOOL vshader_input_is_color(
271     IWineD3DVertexShader* iface,
272     unsigned int regnum) {
273
274     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
275     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
276     IWineD3DVertexDeclarationImpl *vertexDeclaration = (IWineD3DVertexDeclarationImpl *)deviceImpl->stateBlock->vertexDecl;
277
278     DWORD usage_token = This->semantics_in[regnum].usage;
279     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
280     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
281
282     if (vertexDeclaration) {
283         int i;
284         /* Find the declaration element that matches our register, then check
285          * if it has D3DCOLOR as it's type. This works for both d3d8 and d3d9. */
286         for (i = 0; i < vertexDeclaration->declarationWNumElements-1; ++i) {
287             WINED3DVERTEXELEMENT *element = vertexDeclaration->pDeclarationWine + i;
288             if (match_usage(element->Usage, element->UsageIndex, usage, usage_idx)) {
289                 return element->Type == WINED3DDECLTYPE_D3DCOLOR;
290             }
291         }
292     }
293
294     ERR("Either no vertexdeclaration present, or register not matched. This should never happen.\n");
295     return FALSE;
296 }
297
298 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
299     or GLSL and send it to the card */
300 static VOID IWineD3DVertexShaderImpl_GenerateShader(
301     IWineD3DVertexShader *iface,
302     shader_reg_maps* reg_maps,
303     CONST DWORD *pFunction) {
304
305     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
306     SHADER_BUFFER buffer;
307
308 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
309         it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
310     if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
311         HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
312         This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
313         This->fixupVertexBufferSize = PGMSIZE;
314         This->fixupVertexBuffer[0] = 0;
315     }
316     buffer.buffer = This->device->fixupVertexBuffer;
317 #else
318     buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); 
319 #endif
320     buffer.bsize = 0;
321     buffer.lineNo = 0;
322     buffer.newline = TRUE;
323
324     if (This->baseShader.shader_mode == SHADER_GLSL) {
325
326         /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
327         GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
328
329         /* Base Declarations */
330         shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
331
332         /* Base Shader Body */
333         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
334
335         /* Unpack 3.0 outputs */
336         if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0))
337             vshader_glsl_output_unpack(&buffer, This->semantics_out);
338
339         /* Clamp the fog from 0 to 1 if it's used */
340         if (reg_maps->fog) {
341             This->usesFog = 1;
342             shader_addline(&buffer, "gl_FogFragCoord = clamp(gl_FogFragCoord, 0.0, 1.0);\n");
343         }
344         
345         /* Write the final position.
346          *
347          * OpenGL coordinates specify the center of the pixel while d3d coords specify
348          * the corner. The offsets are stored in z and w in the 2nd row of the projection
349          * matrix to avoid wasting a free shader constant. Add them to the w and z coord
350          * of the 2nd row
351          */
352         shader_addline(&buffer, "gl_Position.x = gl_Position.x + posFixup[2];\n");
353         shader_addline(&buffer, "gl_Position.y = gl_Position.y + posFixup[3];\n");
354         /* Account for any inverted textures (render to texture case) by reversing the y coordinate
355          *  (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices)
356          */
357         shader_addline(&buffer, "gl_Position.y = gl_Position.y * posFixup[1];\n");
358
359         shader_addline(&buffer, "}\n");
360
361         TRACE("Compiling shader object %u\n", shader_obj);
362         GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
363         GL_EXTCALL(glCompileShaderARB(shader_obj));
364         print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
365
366         /* Store the shader object */
367         This->baseShader.prgId = shader_obj;
368
369     } else if (This->baseShader.shader_mode == SHADER_ARB) {
370
371         /*  Create the hw ARB shader */
372         shader_addline(&buffer, "!!ARBvp1.0\n");
373
374         /* Mesa supports only 95 constants */
375         if (GL_VEND(MESA) || GL_VEND(WINE))
376             This->baseShader.limits.constant_float = 
377                 min(95, This->baseShader.limits.constant_float);
378
379         /* Base Declarations */
380         shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
381
382         /* We need a constant to fixup the final position */
383         shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
384
385         if (reg_maps->fog) {
386             This->usesFog = 1;
387             shader_addline(&buffer, "TEMP TMP_FOG;\n");
388         }
389
390         /* Base Shader Body */
391         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
392
393         /* Make sure the fog value is positive - values above 1.0 are ignored */
394         if (reg_maps->fog)
395             shader_addline(&buffer, "MAX result.fogcoord, TMP_FOG, 0.0;\n");
396
397         /* Write the final position.
398          *
399          * OpenGL coordinates specify the center of the pixel while d3d coords specify
400          * the corner. The offsets are stored in the 2nd row of the projection matrix,
401          * the x offset in z and the y offset in w. Add them to the resulting position
402          */
403         shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, posFixup.z;\n");
404         shader_addline(&buffer, "ADD TMP_OUT.y, TMP_OUT.y, posFixup.w;\n");
405         /* Account for any inverted textures (render to texture case) by reversing the y coordinate
406          *  (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices)
407          */
408         shader_addline(&buffer, "MUL TMP_OUT.y, TMP_OUT.y, posFixup.y;\n");
409
410         shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
411         
412         shader_addline(&buffer, "END\n"); 
413
414         /* TODO: change to resource.glObjectHandle or something like that */
415         GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
416
417         TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
418         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
419
420         TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
421         /* Create the program and check for errors */
422         GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
423             buffer.bsize, buffer.buffer));
424
425         if (glGetError() == GL_INVALID_OPERATION) {
426             GLint errPos;
427             glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
428             FIXME("HW VertexShader Error at position %d: %s\n",
429                   errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
430             This->baseShader.prgId = -1;
431         }
432     }
433
434 #if 1 /* if were using the data buffer of device then we don't need to free it */
435   HeapFree(GetProcessHeap(), 0, buffer.buffer);
436 #endif
437 }
438
439 /* *******************************************
440    IWineD3DVertexShader IUnknown parts follow
441    ******************************************* */
442 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
443 {
444     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
445     TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
446     if (IsEqualGUID(riid, &IID_IUnknown) 
447         || IsEqualGUID(riid, &IID_IWineD3DBase)
448         || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
449         || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
450         IUnknown_AddRef(iface);
451         *ppobj = This;
452         return S_OK;
453     }
454     *ppobj = NULL;
455     return E_NOINTERFACE;
456 }
457
458 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
459     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
460     TRACE("(%p) : AddRef increasing from %d\n", This, This->ref);
461     return InterlockedIncrement(&This->ref);
462 }
463
464 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
465     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
466     ULONG ref;
467     TRACE("(%p) : Releasing from %d\n", This, This->ref);
468     ref = InterlockedDecrement(&This->ref);
469     if (ref == 0) {
470         if (This->baseShader.shader_mode == SHADER_GLSL && This->baseShader.prgId != 0) {
471             struct list *linked_programs = &This->baseShader.linked_programs;
472
473             TRACE("Deleting linked programs\n");
474             if (linked_programs->next) {
475                 struct glsl_shader_prog_link *entry, *entry2;
476                 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
477                     delete_glsl_program_entry(This->baseShader.device, entry);
478                 }
479             }
480
481             TRACE("Deleting shader object %u\n", This->baseShader.prgId);
482             GL_EXTCALL(glDeleteObjectARB(This->baseShader.prgId));
483             checkGLcall("glDeleteObjectARB");
484         }
485         shader_delete_constant_list(&This->baseShader.constantsF);
486         shader_delete_constant_list(&This->baseShader.constantsB);
487         shader_delete_constant_list(&This->baseShader.constantsI);
488         HeapFree(GetProcessHeap(), 0, This);
489
490     }
491     return ref;
492 }
493
494 /* *******************************************
495    IWineD3DVertexShader IWineD3DVertexShader parts follow
496    ******************************************* */
497
498 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
499     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
500     
501     *parent = This->parent;
502     IUnknown_AddRef(*parent);
503     TRACE("(%p) : returning %p\n", This, *parent);
504     return WINED3D_OK;
505 }
506
507 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
508     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
509     IWineD3DDevice_AddRef(This->baseShader.device);
510     *pDevice = This->baseShader.device;
511     TRACE("(%p) returning %p\n", This, *pDevice);
512     return WINED3D_OK;
513 }
514
515 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
516     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
517     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
518
519     if (NULL == pData) {
520         *pSizeOfData = This->baseShader.functionLength;
521         return WINED3D_OK;
522     }
523     if (*pSizeOfData < This->baseShader.functionLength) {
524         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
525          * than the required size we should write the required size and
526          * return D3DERR_MOREDATA. That's not actually true. */
527         return WINED3DERR_INVALIDCALL;
528     }
529     if (NULL == This->baseShader.function) { /* no function defined */
530         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
531         (*(DWORD **) pData) = NULL;
532     } else {
533         if(This->baseShader.functionLength == 0){
534
535         }
536         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
537         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
538     }
539     return WINED3D_OK;
540 }
541
542 /* Note that for vertex shaders CompileShader isn't called until the
543  * shader is first used. The reason for this is that we need the vertex
544  * declaration the shader will be used with in order to determine if
545  * the data in a register is of type D3DCOLOR, and needs swizzling. */
546 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
547
548     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
549     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
550     HRESULT hr;
551     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
552
553     TRACE("(%p) : pFunction %p\n", iface, pFunction);
554
555     /* First pass: trace shader */
556     shader_trace_init((IWineD3DBaseShader*) This, pFunction);
557     vshader_set_limits(This);
558
559     /* Initialize immediate constant lists */
560     list_init(&This->baseShader.constantsF);
561     list_init(&This->baseShader.constantsB);
562     list_init(&This->baseShader.constantsI);
563
564     /* Second pass: figure out registers used, semantics, etc.. */
565     memset(reg_maps, 0, sizeof(shader_reg_maps));
566     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
567        This->semantics_in, This->semantics_out, pFunction, deviceImpl->stateBlock);
568     if (hr != WINED3D_OK) return hr;
569
570     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
571
572     /* copy the function ... because it will certainly be released by application */
573     if (NULL != pFunction) {
574         void *function;
575
576         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
577         if (!function) return E_OUTOFMEMORY;
578         memcpy(function, pFunction, This->baseShader.functionLength);
579         This->baseShader.function = function;
580     } else {
581         This->baseShader.function = NULL;
582     }
583
584     return WINED3D_OK;
585 }
586
587 /* Preload semantics for d3d8 shaders */
588 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
589     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
590     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
591
592     int i;
593     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
594         WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
595         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
596     }
597 }
598
599 /* Set local constants for d3d8 shaders */
600 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
601         UINT start_idx, const float *src_data, UINT count) {
602     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
603     UINT i, end_idx;
604
605     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
606
607     end_idx = start_idx + count;
608     if (end_idx > GL_LIMITS(vshader_constantsF)) {
609         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
610         end_idx = GL_LIMITS(vshader_constantsF);
611     }
612
613     for (i = start_idx; i < end_idx; ++i) {
614         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
615         if (!lconst) return E_OUTOFMEMORY;
616
617         lconst->idx = i;
618         CopyMemory(lconst->value, src_data + i * 4, 4 * sizeof(float));
619         list_add_head(&This->baseShader.constantsF, &lconst->entry);
620     }
621
622     return WINED3D_OK;
623 }
624
625 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
626     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
627     CONST DWORD *function = This->baseShader.function;
628
629     TRACE("(%p) : function %p\n", iface, function);
630
631     /* We're already compiled. */
632     if (This->baseShader.is_compiled) return WINED3D_OK;
633
634     /* We don't need to compile */
635     if (!function) {
636         This->baseShader.is_compiled = TRUE;
637         return WINED3D_OK;
638     }
639
640     /* Generate the HW shader */
641     TRACE("(%p) : Generating hardware program\n", This);
642     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
643
644     This->baseShader.is_compiled = TRUE;
645
646     return WINED3D_OK;
647 }
648
649 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
650 {
651     /*** IUnknown methods ***/
652     IWineD3DVertexShaderImpl_QueryInterface,
653     IWineD3DVertexShaderImpl_AddRef,
654     IWineD3DVertexShaderImpl_Release,
655     /*** IWineD3DBase methods ***/
656     IWineD3DVertexShaderImpl_GetParent,
657     /*** IWineD3DBaseShader methods ***/
658     IWineD3DVertexShaderImpl_SetFunction,
659     IWineD3DVertexShaderImpl_CompileShader,
660     /*** IWineD3DVertexShader methods ***/
661     IWineD3DVertexShaderImpl_GetDevice,
662     IWineD3DVertexShaderImpl_GetFunction,
663     IWineD3DVertexShaderImpl_FakeSemantics,
664     IWIneD3DVertexShaderImpl_SetLocalConstantsF
665 };