kernel32: Add stub for GetConsoleKeyboardLayoutNameW.
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24
25 #include "config.h"
26
27 #include <math.h>
28 #include <stdio.h>
29
30 #include "wined3d_private.h"
31
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
35
36 /* Shader debugging - Change the following line to enable debugging of software
37       vertex shaders                                                             */
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
45
46 /**
47  * DirectX9 SDK download
48  *  http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
49  *
50  * Exploring D3DX
51  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
52  *
53  * Using Vertex Shaders
54  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
55  *
56  * Dx9 New
57  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
58  *
59  * Dx9 Shaders
60  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
64  *
65  * Dx9 D3DX
66  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
67  *
68  * FVF
69  *  http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
70  *
71  * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72  *  http://developer.nvidia.com/view.asp?IO=vstovp
73  *
74  * NVIDIA: Memory Management with VAR
75  *  http://developer.nvidia.com/view.asp?IO=var_memory_management
76  */
77
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
80
81 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
82
83 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
84     /* This table is not order or position dependent. */
85
86     /* Arithmetic */
87     {WINED3DSIO_NOP,    "nop",  "NOP", 0, 0, vshader_hw_map2gl,   NULL, 0, 0},
88     {WINED3DSIO_MOV,    "mov",  "MOV", 1, 2, vshader_hw_map2gl,   shader_glsl_mov, 0, 0},
89     {WINED3DSIO_MOVA,   "mova",  NULL, 1, 2, vshader_hw_map2gl,   shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
90     {WINED3DSIO_ADD,    "add",  "ADD", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
91     {WINED3DSIO_SUB,    "sub",  "SUB", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
92     {WINED3DSIO_MAD,    "mad",  "MAD", 1, 4, vshader_hw_map2gl,   shader_glsl_mad, 0, 0},
93     {WINED3DSIO_MUL,    "mul",  "MUL", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
94     {WINED3DSIO_RCP,    "rcp",  "RCP", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rcp, 0, 0},
95     {WINED3DSIO_RSQ,    "rsq",  "RSQ", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rsq, 0, 0},
96     {WINED3DSIO_DP3,    "dp3",  "DP3", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
97     {WINED3DSIO_DP4,    "dp4",  "DP4", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
98     {WINED3DSIO_MIN,    "min",  "MIN", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
99     {WINED3DSIO_MAX,    "max",  "MAX", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
100     {WINED3DSIO_SLT,    "slt",  "SLT", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
101     {WINED3DSIO_SGE,    "sge",  "SGE", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
102     {WINED3DSIO_ABS,    "abs",  "ABS", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
103     {WINED3DSIO_EXP,    "exp",  "EX2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
104     {WINED3DSIO_LOG,    "log",  "LG2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
105     {WINED3DSIO_EXPP,   "expp", "EXP", 1, 2, vshader_hw_map2gl,   shader_glsl_expp, 0, 0},
106     {WINED3DSIO_LOGP,   "logp", "LOG", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
107     {WINED3DSIO_LIT,    "lit",  "LIT", 1, 2, vshader_hw_map2gl,   shader_glsl_lit, 0, 0},
108     {WINED3DSIO_DST,    "dst",  "DST", 1, 3, vshader_hw_map2gl,   shader_glsl_dst, 0, 0},
109     {WINED3DSIO_LRP,    "lrp",  "LRP", 1, 4, NULL,                shader_glsl_lrp, 0, 0},
110     {WINED3DSIO_FRC,    "frc",  "FRC", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
111     {WINED3DSIO_POW,    "pow",  "POW", 1, 3, vshader_hw_map2gl,   shader_glsl_pow, 0, 0},
112     {WINED3DSIO_CRS,    "crs",  "XPD", 1, 3, vshader_hw_map2gl,   shader_glsl_cross, 0, 0},
113     /* TODO: sng can possibly be performed a  s
114         RCP tmp, vec
115         MUL out, tmp, vec*/
116     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
117     {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
118     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
119     {WINED3DSIO_SINCOS, "sincos",  "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
120     /* Matrix */
121     {WINED3DSIO_M4x4,   "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
122     {WINED3DSIO_M4x3,   "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
123     {WINED3DSIO_M3x4,   "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
124     {WINED3DSIO_M3x3,   "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
125     {WINED3DSIO_M3x2,   "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
126     /* Declare registers */
127     {WINED3DSIO_DCL,    "dcl",      NULL,                0, 2, NULL, NULL, 0, 0},
128     /* Constant definitions */
129     {WINED3DSIO_DEF,    "def",      NULL,                1, 5, NULL, NULL, 0, 0},
130     {WINED3DSIO_DEFB,   "defb",     GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
131     {WINED3DSIO_DEFI,   "defi",     GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
132     /* Flow control - requires GLSL or software shaders */
133     {WINED3DSIO_REP ,   "rep",      NULL, 0, 1, NULL, shader_glsl_rep,    WINED3DVS_VERSION(2,0), -1},
134     {WINED3DSIO_ENDREP, "endrep",   NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
135     {WINED3DSIO_IF,     "if",       NULL, 0, 1, NULL, shader_glsl_if,     WINED3DVS_VERSION(2,0), -1},
136     {WINED3DSIO_IFC,    "ifc",      NULL, 0, 2, NULL, shader_glsl_ifc,    WINED3DVS_VERSION(2,1), -1},
137     {WINED3DSIO_ELSE,   "else",     NULL, 0, 0, NULL, shader_glsl_else,   WINED3DVS_VERSION(2,0), -1},
138     {WINED3DSIO_ENDIF,  "endif",    NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
139     {WINED3DSIO_BREAK,  "break",    NULL, 0, 0, NULL, shader_glsl_break,  WINED3DVS_VERSION(2,1), -1},
140     {WINED3DSIO_BREAKC, "breakc",   NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
141     {WINED3DSIO_BREAKP, "breakp",   GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
142     {WINED3DSIO_CALL,   "call",     NULL, 0, 1, NULL, shader_glsl_call,   WINED3DVS_VERSION(2,0), -1},
143     {WINED3DSIO_CALLNZ, "callnz",   NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
144     {WINED3DSIO_LOOP,   "loop",     NULL, 0, 2, NULL, shader_glsl_loop,   WINED3DVS_VERSION(2,0), -1},
145     {WINED3DSIO_RET,    "ret",      NULL, 0, 0, NULL, NULL,               WINED3DVS_VERSION(2,0), -1},
146     {WINED3DSIO_ENDLOOP,"endloop",  NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
147     {WINED3DSIO_LABEL,  "label",    NULL, 0, 1, NULL, shader_glsl_label,  WINED3DVS_VERSION(2,0), -1},
148
149     {WINED3DSIO_SETP,   "setp",     GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
150     {WINED3DSIO_TEXLDL, "texldl",   NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
151     {0,                 NULL,       NULL,                0, 0, NULL, NULL, 0, 0}
152 };
153
154 static void vshader_set_limits(
155       IWineD3DVertexShaderImpl *This) {
156
157       This->baseShader.limits.texcoord = 0;
158       This->baseShader.limits.attributes = 16;
159       This->baseShader.limits.packed_input = 0;
160
161       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
162       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
163
164       switch (This->baseShader.hex_version) {
165           case WINED3DVS_VERSION(1,0):
166           case WINED3DVS_VERSION(1,1):
167                    This->baseShader.limits.temporary = 12;
168                    This->baseShader.limits.constant_bool = 0;
169                    This->baseShader.limits.constant_int = 0;
170                    This->baseShader.limits.address = 1;
171                    This->baseShader.limits.packed_output = 0;
172                    This->baseShader.limits.sampler = 0;
173                    This->baseShader.limits.label = 0;
174                    break;
175       
176           case WINED3DVS_VERSION(2,0):
177           case WINED3DVS_VERSION(2,1):
178                    This->baseShader.limits.temporary = 12;
179                    This->baseShader.limits.constant_bool = 16;
180                    This->baseShader.limits.constant_int = 16;
181                    This->baseShader.limits.address = 1;
182                    This->baseShader.limits.packed_output = 0;
183                    This->baseShader.limits.sampler = 0;
184                    This->baseShader.limits.label = 16;
185                    break;
186
187           case WINED3DVS_VERSION(3,0):
188                    This->baseShader.limits.temporary = 32;
189                    This->baseShader.limits.constant_bool = 32;
190                    This->baseShader.limits.constant_int = 32;
191                    This->baseShader.limits.address = 1;
192                    This->baseShader.limits.packed_output = 12;
193                    This->baseShader.limits.sampler = 4;
194                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
195                    break;
196
197           default: This->baseShader.limits.temporary = 12;
198                    This->baseShader.limits.constant_bool = 16;
199                    This->baseShader.limits.constant_int = 16;
200                    This->baseShader.limits.address = 1;
201                    This->baseShader.limits.packed_output = 0;
202                    This->baseShader.limits.sampler = 0;
203                    This->baseShader.limits.label = 16;
204                    FIXME("Unrecognized vertex shader version %#x\n",
205                        This->baseShader.hex_version);
206       }
207 }
208
209 /* This is an internal function,
210  * used to create fake semantics for shaders
211  * that don't have them - d3d8 shaders where the declaration
212  * stores the register for each input
213  */
214 static void vshader_set_input(
215     IWineD3DVertexShaderImpl* This,
216     unsigned int regnum,
217     BYTE usage, BYTE usage_idx) {
218
219     /* Fake usage: set reserved bit, usage, usage_idx */
220     DWORD usage_token = (0x1 << 31) |
221         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
222
223     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
224     DWORD reg_token = (0x1 << 31) |
225         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
226
227     This->semantics_in[regnum].usage = usage_token;
228     This->semantics_in[regnum].reg = reg_token;
229 }
230
231 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
232     if (usage_idx1 != usage_idx2) return FALSE;
233     if (usage1 == usage2) return TRUE;
234     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
235     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
236
237     return FALSE;
238 }
239
240 BOOL vshader_get_input(
241     IWineD3DVertexShader* iface,
242     BYTE usage_req, BYTE usage_idx_req,
243     unsigned int* regnum) {
244
245     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
246     int i;
247
248     for (i = 0; i < MAX_ATTRIBS; i++) {
249         DWORD usage_token = This->semantics_in[i].usage;
250         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
251         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
252
253         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
254             *regnum = i;
255             return TRUE;
256         }
257     }
258     return FALSE;
259 }
260
261 BOOL vshader_input_is_color(
262     IWineD3DVertexShader* iface,
263     unsigned int regnum) {
264
265     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
266     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
267     IWineD3DVertexDeclarationImpl *vertexDeclaration = (IWineD3DVertexDeclarationImpl *)deviceImpl->stateBlock->vertexDecl;
268
269     DWORD usage_token = This->semantics_in[regnum].usage;
270     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
271     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
272
273     if (vertexDeclaration) {
274         int i;
275         /* Find the declaration element that matches our register, then check
276          * if it has D3DCOLOR as it's type. This works for both d3d8 and d3d9. */
277         for (i = 0; i < vertexDeclaration->declarationWNumElements-1; ++i) {
278             WINED3DVERTEXELEMENT *element = vertexDeclaration->pDeclarationWine + i;
279             if (match_usage(element->Usage, element->UsageIndex, usage, usage_idx)) {
280                 return element->Type == WINED3DDECLTYPE_D3DCOLOR;
281             }
282         }
283     }
284
285     ERR("Either no vertexdeclaration present, or register not matched. This should never happen.\n");
286     return FALSE;
287 }
288
289 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
290     or GLSL and send it to the card */
291 static VOID IWineD3DVertexShaderImpl_GenerateShader(
292     IWineD3DVertexShader *iface,
293     shader_reg_maps* reg_maps,
294     CONST DWORD *pFunction) {
295
296     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
297     SHADER_BUFFER buffer;
298
299 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
300         it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
301     if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
302         HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
303         This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
304         This->fixupVertexBufferSize = PGMSIZE;
305         This->fixupVertexBuffer[0] = 0;
306     }
307     buffer.buffer = This->device->fixupVertexBuffer;
308 #else
309     buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); 
310 #endif
311     buffer.bsize = 0;
312     buffer.lineNo = 0;
313     buffer.newline = TRUE;
314
315     if (This->baseShader.shader_mode == SHADER_GLSL) {
316
317         /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
318         GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
319
320         /* Base Declarations */
321         shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
322
323         /* Base Shader Body */
324         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
325
326         /* Unpack 3.0 outputs */
327         if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0)) {
328             shader_addline(&buffer, "order_ps_input(OUT);\n");
329         } else {
330             shader_addline(&buffer, "order_ps_input();\n");
331         }
332
333         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
334         if (!reg_maps->fog)
335             shader_addline(&buffer, "gl_FogFragCoord = gl_Position.z;\n");
336
337         /* Write the final position.
338          *
339          * OpenGL coordinates specify the center of the pixel while d3d coords specify
340          * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
341          * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
342          * contains 1.0 to allow a mad.
343          */
344         shader_addline(&buffer, "gl_Position.xy = gl_Position.xy * posFixup.xy + posFixup.zw;\n");
345
346         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
347          *
348          * Basically we want(in homogenous coordinates) z = z * 2 - 1. However, shaders are run
349          * before the homogenous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
350          * which is the same as z = z / 2 - w.
351          */
352         shader_addline(&buffer, "tmp0 = gl_Position;\n");
353         shader_addline(&buffer, "gl_Position.z = tmp0.z * 2.0;\n");
354         shader_addline(&buffer, "gl_Position.z = gl_Position.z - gl_Position.w;\n");
355
356         shader_addline(&buffer, "}\n");
357
358         TRACE("Compiling shader object %u\n", shader_obj);
359         GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
360         GL_EXTCALL(glCompileShaderARB(shader_obj));
361         print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
362
363         /* Store the shader object */
364         This->baseShader.prgId = shader_obj;
365
366     } else if (This->baseShader.shader_mode == SHADER_ARB) {
367
368         /*  Create the hw ARB shader */
369         shader_addline(&buffer, "!!ARBvp1.0\n");
370         shader_addline(&buffer, "PARAM helper_const = { 2.0, -1.0, %d.0, 0.0 };\n", This->rel_offset);
371
372         /* Mesa supports only 95 constants */
373         if (GL_VEND(MESA) || GL_VEND(WINE))
374             This->baseShader.limits.constant_float = 
375                 min(95, This->baseShader.limits.constant_float);
376
377         /* Some instructions need a temporary register. Add it if needed, but only if it is really needed */
378         if(reg_maps->usesnrm || This->rel_offset) {
379             shader_addline(&buffer, "TEMP TMP;\n");
380         }
381
382         /* Base Declarations */
383         shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
384
385         /* We need a constant to fixup the final position */
386         shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
387
388         /* Base Shader Body */
389         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
390
391         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
392         if (!reg_maps->fog)
393             shader_addline(&buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
394
395         /* Write the final position.
396          *
397          * OpenGL coordinates specify the center of the pixel while d3d coords specify
398          * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
399          * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
400          * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
401          */
402         shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, posFixup.z;\n");
403         shader_addline(&buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, posFixup.w;\n");
404
405         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
406          * and the glsl equivalent
407          */
408         shader_addline(&buffer, "MAD TMP_OUT.z, TMP_OUT.z, helper_const.x, -TMP_OUT.w;\n");
409
410         shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
411         
412         shader_addline(&buffer, "END\n"); 
413
414         /* TODO: change to resource.glObjectHandle or something like that */
415         GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
416
417         TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
418         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
419
420         TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
421         /* Create the program and check for errors */
422         GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
423             buffer.bsize, buffer.buffer));
424
425         if (glGetError() == GL_INVALID_OPERATION) {
426             GLint errPos;
427             glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
428             FIXME("HW VertexShader Error at position %d: %s\n",
429                   errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
430             This->baseShader.prgId = -1;
431         }
432     }
433
434 #if 1 /* if were using the data buffer of device then we don't need to free it */
435   HeapFree(GetProcessHeap(), 0, buffer.buffer);
436 #endif
437 }
438
439 /* *******************************************
440    IWineD3DVertexShader IUnknown parts follow
441    ******************************************* */
442 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
443 {
444     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
445     TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
446     if (IsEqualGUID(riid, &IID_IUnknown) 
447         || IsEqualGUID(riid, &IID_IWineD3DBase)
448         || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
449         || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
450         IUnknown_AddRef(iface);
451         *ppobj = This;
452         return S_OK;
453     }
454     *ppobj = NULL;
455     return E_NOINTERFACE;
456 }
457
458 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
459     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
460     TRACE("(%p) : AddRef increasing from %d\n", This, This->ref);
461     return InterlockedIncrement(&This->ref);
462 }
463
464 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
465     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
466     ULONG ref;
467     TRACE("(%p) : Releasing from %d\n", This, This->ref);
468     ref = InterlockedDecrement(&This->ref);
469     if (ref == 0) {
470         if (This->baseShader.shader_mode == SHADER_GLSL && This->baseShader.prgId != 0) {
471             struct list *linked_programs = &This->baseShader.linked_programs;
472
473             TRACE("Deleting linked programs\n");
474             if (linked_programs->next) {
475                 struct glsl_shader_prog_link *entry, *entry2;
476                 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
477                     delete_glsl_program_entry(This->baseShader.device, entry);
478                 }
479             }
480
481             TRACE("Deleting shader object %u\n", This->baseShader.prgId);
482             GL_EXTCALL(glDeleteObjectARB(This->baseShader.prgId));
483             checkGLcall("glDeleteObjectARB");
484         }
485         shader_delete_constant_list(&This->baseShader.constantsF);
486         shader_delete_constant_list(&This->baseShader.constantsB);
487         shader_delete_constant_list(&This->baseShader.constantsI);
488         HeapFree(GetProcessHeap(), 0, This);
489
490     }
491     return ref;
492 }
493
494 /* *******************************************
495    IWineD3DVertexShader IWineD3DVertexShader parts follow
496    ******************************************* */
497
498 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
499     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
500     
501     *parent = This->parent;
502     IUnknown_AddRef(*parent);
503     TRACE("(%p) : returning %p\n", This, *parent);
504     return WINED3D_OK;
505 }
506
507 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
508     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
509     IWineD3DDevice_AddRef(This->baseShader.device);
510     *pDevice = This->baseShader.device;
511     TRACE("(%p) returning %p\n", This, *pDevice);
512     return WINED3D_OK;
513 }
514
515 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
516     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
517     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
518
519     if (NULL == pData) {
520         *pSizeOfData = This->baseShader.functionLength;
521         return WINED3D_OK;
522     }
523     if (*pSizeOfData < This->baseShader.functionLength) {
524         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
525          * than the required size we should write the required size and
526          * return D3DERR_MOREDATA. That's not actually true. */
527         return WINED3DERR_INVALIDCALL;
528     }
529     if (NULL == This->baseShader.function) { /* no function defined */
530         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
531         (*(DWORD **) pData) = NULL;
532     } else {
533         if(This->baseShader.functionLength == 0){
534
535         }
536         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
537         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
538     }
539     return WINED3D_OK;
540 }
541
542 /* Note that for vertex shaders CompileShader isn't called until the
543  * shader is first used. The reason for this is that we need the vertex
544  * declaration the shader will be used with in order to determine if
545  * the data in a register is of type D3DCOLOR, and needs swizzling. */
546 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
547
548     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
549     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
550     HRESULT hr;
551     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
552
553     TRACE("(%p) : pFunction %p\n", iface, pFunction);
554
555     /* First pass: trace shader */
556     shader_trace_init((IWineD3DBaseShader*) This, pFunction);
557     vshader_set_limits(This);
558
559     /* Initialize immediate constant lists */
560     list_init(&This->baseShader.constantsF);
561     list_init(&This->baseShader.constantsB);
562     list_init(&This->baseShader.constantsI);
563
564     /* Second pass: figure out registers used, semantics, etc.. */
565     This->min_rel_offset = GL_LIMITS(vshader_constantsF);
566     This->max_rel_offset = 0;
567     memset(reg_maps, 0, sizeof(shader_reg_maps));
568     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
569        This->semantics_in, This->semantics_out, pFunction, NULL);
570     if (hr != WINED3D_OK) return hr;
571
572     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
573
574     if(deviceImpl->vs_selected_mode == SHADER_ARB &&
575        (GLINFO_LOCATION).arb_vs_offset_limit      &&
576        This->min_rel_offset <= This->max_rel_offset) {
577
578         if(This->max_rel_offset - This->min_rel_offset > 127) {
579             FIXME("The difference between the minimum and maximum relative offset is > 127\n");
580             FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
581             FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
582         } else if(This->max_rel_offset - This->min_rel_offset > 63) {
583             This->rel_offset = This->min_rel_offset + 63;
584         } else if(This->max_rel_offset > 63) {
585             This->rel_offset = This->min_rel_offset;
586         } else {
587             This->rel_offset = 0;
588         }
589     }
590
591     /* copy the function ... because it will certainly be released by application */
592     if (NULL != pFunction) {
593         void *function;
594
595         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
596         if (!function) return E_OUTOFMEMORY;
597         memcpy(function, pFunction, This->baseShader.functionLength);
598         This->baseShader.function = function;
599     } else {
600         This->baseShader.function = NULL;
601     }
602
603     return WINED3D_OK;
604 }
605
606 /* Preload semantics for d3d8 shaders */
607 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
608     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
609     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
610
611     int i;
612     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
613         WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
614         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
615     }
616 }
617
618 /* Set local constants for d3d8 shaders */
619 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
620         UINT start_idx, const float *src_data, UINT count) {
621     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
622     UINT i, end_idx;
623
624     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
625
626     end_idx = start_idx + count;
627     if (end_idx > GL_LIMITS(vshader_constantsF)) {
628         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
629         end_idx = GL_LIMITS(vshader_constantsF);
630     }
631
632     for (i = start_idx; i < end_idx; ++i) {
633         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
634         if (!lconst) return E_OUTOFMEMORY;
635
636         lconst->idx = i;
637         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
638         list_add_head(&This->baseShader.constantsF, &lconst->entry);
639     }
640
641     return WINED3D_OK;
642 }
643
644 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
645     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
646     CONST DWORD *function = This->baseShader.function;
647
648     TRACE("(%p) : function %p\n", iface, function);
649
650     /* We're already compiled. */
651     if (This->baseShader.is_compiled) return WINED3D_OK;
652
653     /* We don't need to compile */
654     if (!function) {
655         This->baseShader.is_compiled = TRUE;
656         return WINED3D_OK;
657     }
658
659     /* Generate the HW shader */
660     TRACE("(%p) : Generating hardware program\n", This);
661     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
662
663     This->baseShader.is_compiled = TRUE;
664
665     return WINED3D_OK;
666 }
667
668 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
669 {
670     /*** IUnknown methods ***/
671     IWineD3DVertexShaderImpl_QueryInterface,
672     IWineD3DVertexShaderImpl_AddRef,
673     IWineD3DVertexShaderImpl_Release,
674     /*** IWineD3DBase methods ***/
675     IWineD3DVertexShaderImpl_GetParent,
676     /*** IWineD3DBaseShader methods ***/
677     IWineD3DVertexShaderImpl_SetFunction,
678     IWineD3DVertexShaderImpl_CompileShader,
679     /*** IWineD3DVertexShader methods ***/
680     IWineD3DVertexShaderImpl_GetDevice,
681     IWineD3DVertexShaderImpl_GetFunction,
682     IWineD3DVertexShaderImpl_FakeSemantics,
683     IWIneD3DVertexShaderImpl_SetLocalConstantsF
684 };