comctl32: We can now store binary files in the repository.
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24
25 #include "config.h"
26
27 #include <math.h>
28 #include <stdio.h>
29
30 #include "wined3d_private.h"
31
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
35
36 /* Shader debugging - Change the following line to enable debugging of software
37       vertex shaders                                                             */
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
45
46 /**
47  * DirectX9 SDK download
48  *  http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
49  *
50  * Exploring D3DX
51  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
52  *
53  * Using Vertex Shaders
54  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
55  *
56  * Dx9 New
57  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
58  *
59  * Dx9 Shaders
60  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
64  *
65  * Dx9 D3DX
66  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
67  *
68  * FVF
69  *  http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
70  *
71  * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72  *  http://developer.nvidia.com/view.asp?IO=vstovp
73  *
74  * NVIDIA: Memory Management with VAR
75  *  http://developer.nvidia.com/view.asp?IO=var_memory_management
76  */
77
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
80
81 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
82
83 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
84     /* This table is not order or position dependent. */
85
86     /* Arithmetic */
87     {WINED3DSIO_NOP,    "nop",  "NOP", 0, 0, vshader_hw_map2gl,   NULL, 0, 0},
88     {WINED3DSIO_MOV,    "mov",  "MOV", 1, 2, vshader_hw_map2gl,   shader_glsl_mov, 0, 0},
89     {WINED3DSIO_MOVA,   "mova",  NULL, 1, 2, vshader_hw_map2gl,   shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
90     {WINED3DSIO_ADD,    "add",  "ADD", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
91     {WINED3DSIO_SUB,    "sub",  "SUB", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
92     {WINED3DSIO_MAD,    "mad",  "MAD", 1, 4, vshader_hw_map2gl,   shader_glsl_mad, 0, 0},
93     {WINED3DSIO_MUL,    "mul",  "MUL", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
94     {WINED3DSIO_RCP,    "rcp",  "RCP", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rcp, 0, 0},
95     {WINED3DSIO_RSQ,    "rsq",  "RSQ", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rsq, 0, 0},
96     {WINED3DSIO_DP3,    "dp3",  "DP3", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
97     {WINED3DSIO_DP4,    "dp4",  "DP4", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
98     {WINED3DSIO_MIN,    "min",  "MIN", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
99     {WINED3DSIO_MAX,    "max",  "MAX", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
100     {WINED3DSIO_SLT,    "slt",  "SLT", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
101     {WINED3DSIO_SGE,    "sge",  "SGE", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
102     {WINED3DSIO_ABS,    "abs",  "ABS", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
103     {WINED3DSIO_EXP,    "exp",  "EX2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
104     {WINED3DSIO_LOG,    "log",  "LG2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
105     {WINED3DSIO_EXPP,   "expp", "EXP", 1, 2, vshader_hw_map2gl,   shader_glsl_expp, 0, 0},
106     {WINED3DSIO_LOGP,   "logp", "LOG", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
107     {WINED3DSIO_LIT,    "lit",  "LIT", 1, 2, vshader_hw_map2gl,   shader_glsl_lit, 0, 0},
108     {WINED3DSIO_DST,    "dst",  "DST", 1, 3, vshader_hw_map2gl,   shader_glsl_dst, 0, 0},
109     {WINED3DSIO_LRP,    "lrp",  "LRP", 1, 4, NULL,                shader_glsl_lrp, 0, 0},
110     {WINED3DSIO_FRC,    "frc",  "FRC", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
111     {WINED3DSIO_POW,    "pow",  "POW", 1, 3, NULL,                shader_glsl_pow, 0, 0},
112     {WINED3DSIO_CRS,    "crs",  "XPS", 1, 3, NULL,                shader_glsl_cross, 0, 0},
113     /* TODO: sng can possibly be performed a  s
114         RCP tmp, vec
115         MUL out, tmp, vec*/
116     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
117     /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
118         DP3 tmp , vec, vec;
119         RSQ tmp, tmp.x;
120         MUL vec.xyz, vec, tmp;
121     but I think this is better because it accounts for w properly.
122         DP3 tmp , vec, vec;
123         RSQ tmp, tmp.x;
124         MUL vec, vec, tmp;
125     */
126     {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
127     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, NULL, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
128     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 2, NULL, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
129     /* Matrix */
130     {WINED3DSIO_M4x4,   "m4x4", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
131     {WINED3DSIO_M4x3,   "m4x3", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
132     {WINED3DSIO_M3x4,   "m3x4", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
133     {WINED3DSIO_M3x3,   "m3x3", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
134     {WINED3DSIO_M3x2,   "m3x2", "undefined", 1, 3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
135     /* Declare registers */
136     {WINED3DSIO_DCL,    "dcl",      NULL,                0, 2, NULL, NULL, 0, 0},
137     /* Constant definitions */
138     {WINED3DSIO_DEF,    "def",      NULL,                1, 5, NULL, NULL, 0, 0},
139     {WINED3DSIO_DEFB,   "defb",     GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
140     {WINED3DSIO_DEFI,   "defi",     GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
141     /* Flow control - requires GLSL or software shaders */
142     {WINED3DSIO_REP ,   "rep",      NULL, 0, 1, NULL, shader_glsl_rep,    WINED3DVS_VERSION(2,0), -1},
143     {WINED3DSIO_ENDREP, "endrep",   NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
144     {WINED3DSIO_IF,     "if",       NULL, 0, 1, NULL, shader_glsl_if,     WINED3DVS_VERSION(2,0), -1},
145     {WINED3DSIO_IFC,    "ifc",      NULL, 0, 2, NULL, shader_glsl_ifc,    WINED3DVS_VERSION(2,1), -1},
146     {WINED3DSIO_ELSE,   "else",     NULL, 0, 0, NULL, shader_glsl_else,   WINED3DVS_VERSION(2,0), -1},
147     {WINED3DSIO_ENDIF,  "endif",    NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
148     {WINED3DSIO_BREAK,  "break",    NULL, 0, 0, NULL, shader_glsl_break,  WINED3DVS_VERSION(2,1), -1},
149     {WINED3DSIO_BREAKC, "breakc",   NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
150     {WINED3DSIO_BREAKP, "breakp",   GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
151     {WINED3DSIO_CALL,   "call",     NULL, 0, 1, NULL, shader_glsl_call,   WINED3DVS_VERSION(2,0), -1},
152     {WINED3DSIO_CALLNZ, "callnz",   NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
153     {WINED3DSIO_LOOP,   "loop",     NULL, 0, 2, NULL, shader_glsl_loop,   WINED3DVS_VERSION(2,0), -1},
154     {WINED3DSIO_RET,    "ret",      NULL, 0, 0, NULL, NULL,               WINED3DVS_VERSION(2,0), -1},
155     {WINED3DSIO_ENDLOOP,"endloop",  NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
156     {WINED3DSIO_LABEL,  "label",    NULL, 0, 1, NULL, shader_glsl_label,  WINED3DVS_VERSION(2,0), -1},
157
158     {WINED3DSIO_SETP,   "setp",     GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
159     {WINED3DSIO_TEXLDL, "texldl",   NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
160     {0,                 NULL,       NULL,                0, 0, NULL, NULL, 0, 0}
161 };
162
163 static void vshader_set_limits(
164       IWineD3DVertexShaderImpl *This) {
165
166       This->baseShader.limits.texcoord = 0;
167       This->baseShader.limits.attributes = 16;
168       This->baseShader.limits.packed_input = 0;
169
170       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
171       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
172
173       switch (This->baseShader.hex_version) {
174           case WINED3DVS_VERSION(1,0):
175           case WINED3DVS_VERSION(1,1):
176                    This->baseShader.limits.temporary = 12;
177                    This->baseShader.limits.constant_bool = 0;
178                    This->baseShader.limits.constant_int = 0;
179                    This->baseShader.limits.address = 1;
180                    This->baseShader.limits.packed_output = 0;
181                    This->baseShader.limits.sampler = 0;
182                    This->baseShader.limits.label = 0;
183                    break;
184       
185           case WINED3DVS_VERSION(2,0):
186           case WINED3DVS_VERSION(2,1):
187                    This->baseShader.limits.temporary = 12;
188                    This->baseShader.limits.constant_bool = 16;
189                    This->baseShader.limits.constant_int = 16;
190                    This->baseShader.limits.address = 1;
191                    This->baseShader.limits.packed_output = 0;
192                    This->baseShader.limits.sampler = 0;
193                    This->baseShader.limits.label = 16;
194                    break;
195
196           case WINED3DVS_VERSION(3,0):
197                    This->baseShader.limits.temporary = 32;
198                    This->baseShader.limits.constant_bool = 32;
199                    This->baseShader.limits.constant_int = 32;
200                    This->baseShader.limits.address = 1;
201                    This->baseShader.limits.packed_output = 12;
202                    This->baseShader.limits.sampler = 4;
203                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
204                    break;
205
206           default: This->baseShader.limits.temporary = 12;
207                    This->baseShader.limits.constant_bool = 16;
208                    This->baseShader.limits.constant_int = 16;
209                    This->baseShader.limits.address = 1;
210                    This->baseShader.limits.packed_output = 0;
211                    This->baseShader.limits.sampler = 0;
212                    This->baseShader.limits.label = 16;
213                    FIXME("Unrecognized vertex shader version %#x\n",
214                        This->baseShader.hex_version);
215       }
216 }
217
218 /* This is an internal function,
219  * used to create fake semantics for shaders
220  * that don't have them - d3d8 shaders where the declaration
221  * stores the register for each input
222  */
223 static void vshader_set_input(
224     IWineD3DVertexShaderImpl* This,
225     unsigned int regnum,
226     BYTE usage, BYTE usage_idx) {
227
228     /* Fake usage: set reserved bit, usage, usage_idx */
229     DWORD usage_token = (0x1 << 31) |
230         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
231
232     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
233     DWORD reg_token = (0x1 << 31) |
234         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
235
236     This->semantics_in[regnum].usage = usage_token;
237     This->semantics_in[regnum].reg = reg_token;
238 }
239
240 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
241     if (usage_idx1 != usage_idx2) return FALSE;
242     if (usage1 == usage2) return TRUE;
243     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
244     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
245
246     return FALSE;
247 }
248
249 BOOL vshader_get_input(
250     IWineD3DVertexShader* iface,
251     BYTE usage_req, BYTE usage_idx_req,
252     unsigned int* regnum) {
253
254     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
255     int i;
256
257     for (i = 0; i < MAX_ATTRIBS; i++) {
258         DWORD usage_token = This->semantics_in[i].usage;
259         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
260         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
261
262         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
263             *regnum = i;
264             return TRUE;
265         }
266     }
267     return FALSE;
268 }
269
270 BOOL vshader_input_is_color(
271     IWineD3DVertexShader* iface,
272     unsigned int regnum) {
273
274     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
275     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
276     IWineD3DVertexDeclarationImpl *vertexDeclaration = (IWineD3DVertexDeclarationImpl *)deviceImpl->stateBlock->vertexDecl;
277
278     DWORD usage_token = This->semantics_in[regnum].usage;
279     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
280     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
281
282     if (vertexDeclaration) {
283         int i;
284         /* Find the declaration element that matches our register, then check
285          * if it has D3DCOLOR as it's type. This works for both d3d8 and d3d9. */
286         for (i = 0; i < vertexDeclaration->declarationWNumElements-1; ++i) {
287             WINED3DVERTEXELEMENT *element = vertexDeclaration->pDeclarationWine + i;
288             if (match_usage(element->Usage, element->UsageIndex, usage, usage_idx)) {
289                 return element->Type == WINED3DDECLTYPE_D3DCOLOR;
290             }
291         }
292     }
293
294     ERR("Either no vertexdeclaration present, or register not matched. This should never happen.\n");
295     return FALSE;
296 }
297
298 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
299     or GLSL and send it to the card */
300 static VOID IWineD3DVertexShaderImpl_GenerateShader(
301     IWineD3DVertexShader *iface,
302     shader_reg_maps* reg_maps,
303     CONST DWORD *pFunction) {
304
305     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
306     SHADER_BUFFER buffer;
307
308 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
309         it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
310     if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
311         HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
312         This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
313         This->fixupVertexBufferSize = PGMSIZE;
314         This->fixupVertexBuffer[0] = 0;
315     }
316     buffer.buffer = This->device->fixupVertexBuffer;
317 #else
318     buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); 
319 #endif
320     buffer.bsize = 0;
321     buffer.lineNo = 0;
322     buffer.newline = TRUE;
323
324     if (This->baseShader.shader_mode == SHADER_GLSL) {
325
326         /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
327         GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
328
329         /* Base Declarations */
330         shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
331
332         /* Base Shader Body */
333         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
334
335         /* Unpack 3.0 outputs */
336         if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0))
337             vshader_glsl_output_unpack(&buffer, This->semantics_out);
338
339         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
340         if (!reg_maps->fog)
341             shader_addline(&buffer, "gl_FogFragCoord = gl_Position.z;\n");
342         
343         /* Write the final position.
344          *
345          * OpenGL coordinates specify the center of the pixel while d3d coords specify
346          * the corner. The offsets are stored in z and w in the 2nd row of the projection
347          * matrix to avoid wasting a free shader constant. Add them to the w and z coord
348          * of the 2nd row
349          */
350         shader_addline(&buffer, "gl_Position.x = gl_Position.x + posFixup[2];\n");
351         shader_addline(&buffer, "gl_Position.y = gl_Position.y + posFixup[3];\n");
352         /* Account for any inverted textures (render to texture case) by reversing the y coordinate
353          *  (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices)
354          */
355         shader_addline(&buffer, "gl_Position.y = gl_Position.y * posFixup[1];\n");
356         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
357          *
358          * Basically we want(in homogenous coordinates) z = z * 2 - 1. However, shaders are run
359          * before the homogenous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
360          * which is the same as z = z / 2 - w.
361          */
362         shader_addline(&buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
363
364         shader_addline(&buffer, "}\n");
365
366         TRACE("Compiling shader object %u\n", shader_obj);
367         GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
368         GL_EXTCALL(glCompileShaderARB(shader_obj));
369         print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
370
371         /* Store the shader object */
372         This->baseShader.prgId = shader_obj;
373
374     } else if (This->baseShader.shader_mode == SHADER_ARB) {
375
376         /*  Create the hw ARB shader */
377         shader_addline(&buffer, "!!ARBvp1.0\n");
378         shader_addline(&buffer, "PARAM zfixup = { 2.0, -1.0, 0.0, 0.0 };\n");
379
380         /* Mesa supports only 95 constants */
381         if (GL_VEND(MESA) || GL_VEND(WINE))
382             This->baseShader.limits.constant_float = 
383                 min(95, This->baseShader.limits.constant_float);
384
385         /* Base Declarations */
386         shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
387
388         /* We need a constant to fixup the final position */
389         shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
390
391         /* Base Shader Body */
392         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
393
394         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
395         if (!reg_maps->fog)
396             shader_addline(&buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
397
398         /* Write the final position.
399          *
400          * OpenGL coordinates specify the center of the pixel while d3d coords specify
401          * the corner. The offsets are stored in the 2nd row of the projection matrix,
402          * the x offset in z and the y offset in w. Add them to the resulting position
403          */
404         shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, posFixup.z;\n");
405         shader_addline(&buffer, "ADD TMP_OUT.y, TMP_OUT.y, posFixup.w;\n");
406         /* Account for any inverted textures (render to texture case) by reversing the y coordinate
407          *  (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices)
408          */
409         shader_addline(&buffer, "MUL TMP_OUT.y, TMP_OUT.y, posFixup.y;\n");
410         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
411          * and the glsl equivalent
412          */
413         shader_addline(&buffer, "MAD TMP_OUT.z, TMP_OUT.z, zfixup.x, -TMP_OUT.w;\n");
414
415         shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
416         
417         shader_addline(&buffer, "END\n"); 
418
419         /* TODO: change to resource.glObjectHandle or something like that */
420         GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
421
422         TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
423         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
424
425         TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
426         /* Create the program and check for errors */
427         GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
428             buffer.bsize, buffer.buffer));
429
430         if (glGetError() == GL_INVALID_OPERATION) {
431             GLint errPos;
432             glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
433             FIXME("HW VertexShader Error at position %d: %s\n",
434                   errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
435             This->baseShader.prgId = -1;
436         }
437     }
438
439 #if 1 /* if were using the data buffer of device then we don't need to free it */
440   HeapFree(GetProcessHeap(), 0, buffer.buffer);
441 #endif
442 }
443
444 /* *******************************************
445    IWineD3DVertexShader IUnknown parts follow
446    ******************************************* */
447 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
448 {
449     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
450     TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
451     if (IsEqualGUID(riid, &IID_IUnknown) 
452         || IsEqualGUID(riid, &IID_IWineD3DBase)
453         || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
454         || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
455         IUnknown_AddRef(iface);
456         *ppobj = This;
457         return S_OK;
458     }
459     *ppobj = NULL;
460     return E_NOINTERFACE;
461 }
462
463 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
464     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
465     TRACE("(%p) : AddRef increasing from %d\n", This, This->ref);
466     return InterlockedIncrement(&This->ref);
467 }
468
469 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
470     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
471     ULONG ref;
472     TRACE("(%p) : Releasing from %d\n", This, This->ref);
473     ref = InterlockedDecrement(&This->ref);
474     if (ref == 0) {
475         if (This->baseShader.shader_mode == SHADER_GLSL && This->baseShader.prgId != 0) {
476             struct list *linked_programs = &This->baseShader.linked_programs;
477
478             TRACE("Deleting linked programs\n");
479             if (linked_programs->next) {
480                 struct glsl_shader_prog_link *entry, *entry2;
481                 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
482                     delete_glsl_program_entry(This->baseShader.device, entry);
483                 }
484             }
485
486             TRACE("Deleting shader object %u\n", This->baseShader.prgId);
487             GL_EXTCALL(glDeleteObjectARB(This->baseShader.prgId));
488             checkGLcall("glDeleteObjectARB");
489         }
490         shader_delete_constant_list(&This->baseShader.constantsF);
491         shader_delete_constant_list(&This->baseShader.constantsB);
492         shader_delete_constant_list(&This->baseShader.constantsI);
493         HeapFree(GetProcessHeap(), 0, This);
494
495     }
496     return ref;
497 }
498
499 /* *******************************************
500    IWineD3DVertexShader IWineD3DVertexShader parts follow
501    ******************************************* */
502
503 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
504     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
505     
506     *parent = This->parent;
507     IUnknown_AddRef(*parent);
508     TRACE("(%p) : returning %p\n", This, *parent);
509     return WINED3D_OK;
510 }
511
512 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
513     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
514     IWineD3DDevice_AddRef(This->baseShader.device);
515     *pDevice = This->baseShader.device;
516     TRACE("(%p) returning %p\n", This, *pDevice);
517     return WINED3D_OK;
518 }
519
520 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
521     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
522     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
523
524     if (NULL == pData) {
525         *pSizeOfData = This->baseShader.functionLength;
526         return WINED3D_OK;
527     }
528     if (*pSizeOfData < This->baseShader.functionLength) {
529         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
530          * than the required size we should write the required size and
531          * return D3DERR_MOREDATA. That's not actually true. */
532         return WINED3DERR_INVALIDCALL;
533     }
534     if (NULL == This->baseShader.function) { /* no function defined */
535         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
536         (*(DWORD **) pData) = NULL;
537     } else {
538         if(This->baseShader.functionLength == 0){
539
540         }
541         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
542         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
543     }
544     return WINED3D_OK;
545 }
546
547 /* Note that for vertex shaders CompileShader isn't called until the
548  * shader is first used. The reason for this is that we need the vertex
549  * declaration the shader will be used with in order to determine if
550  * the data in a register is of type D3DCOLOR, and needs swizzling. */
551 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
552
553     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
554     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
555     HRESULT hr;
556     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
557
558     TRACE("(%p) : pFunction %p\n", iface, pFunction);
559
560     /* First pass: trace shader */
561     shader_trace_init((IWineD3DBaseShader*) This, pFunction);
562     vshader_set_limits(This);
563
564     /* Initialize immediate constant lists */
565     list_init(&This->baseShader.constantsF);
566     list_init(&This->baseShader.constantsB);
567     list_init(&This->baseShader.constantsI);
568
569     /* Second pass: figure out registers used, semantics, etc.. */
570     memset(reg_maps, 0, sizeof(shader_reg_maps));
571     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
572        This->semantics_in, This->semantics_out, pFunction, NULL);
573     if (hr != WINED3D_OK) return hr;
574
575     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
576
577     /* copy the function ... because it will certainly be released by application */
578     if (NULL != pFunction) {
579         void *function;
580
581         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
582         if (!function) return E_OUTOFMEMORY;
583         memcpy(function, pFunction, This->baseShader.functionLength);
584         This->baseShader.function = function;
585     } else {
586         This->baseShader.function = NULL;
587     }
588
589     return WINED3D_OK;
590 }
591
592 /* Preload semantics for d3d8 shaders */
593 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
594     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
595     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
596
597     int i;
598     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
599         WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
600         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
601     }
602 }
603
604 /* Set local constants for d3d8 shaders */
605 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
606         UINT start_idx, const float *src_data, UINT count) {
607     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
608     UINT i, end_idx;
609
610     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
611
612     end_idx = start_idx + count;
613     if (end_idx > GL_LIMITS(vshader_constantsF)) {
614         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
615         end_idx = GL_LIMITS(vshader_constantsF);
616     }
617
618     for (i = start_idx; i < end_idx; ++i) {
619         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
620         if (!lconst) return E_OUTOFMEMORY;
621
622         lconst->idx = i;
623         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
624         list_add_head(&This->baseShader.constantsF, &lconst->entry);
625     }
626
627     return WINED3D_OK;
628 }
629
630 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
631     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
632     CONST DWORD *function = This->baseShader.function;
633
634     TRACE("(%p) : function %p\n", iface, function);
635
636     /* We're already compiled. */
637     if (This->baseShader.is_compiled) return WINED3D_OK;
638
639     /* We don't need to compile */
640     if (!function) {
641         This->baseShader.is_compiled = TRUE;
642         return WINED3D_OK;
643     }
644
645     /* Generate the HW shader */
646     TRACE("(%p) : Generating hardware program\n", This);
647     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
648
649     This->baseShader.is_compiled = TRUE;
650
651     return WINED3D_OK;
652 }
653
654 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
655 {
656     /*** IUnknown methods ***/
657     IWineD3DVertexShaderImpl_QueryInterface,
658     IWineD3DVertexShaderImpl_AddRef,
659     IWineD3DVertexShaderImpl_Release,
660     /*** IWineD3DBase methods ***/
661     IWineD3DVertexShaderImpl_GetParent,
662     /*** IWineD3DBaseShader methods ***/
663     IWineD3DVertexShaderImpl_SetFunction,
664     IWineD3DVertexShaderImpl_CompileShader,
665     /*** IWineD3DVertexShader methods ***/
666     IWineD3DVertexShaderImpl_GetDevice,
667     IWineD3DVertexShaderImpl_GetFunction,
668     IWineD3DVertexShaderImpl_FakeSemantics,
669     IWIneD3DVertexShaderImpl_SetLocalConstantsF
670 };