mshtml: Fixed handling channels without container and necko channel.
[wine] / dlls / wined3d / vertexshader.c
1 /*
2  * shaders implementation
3  *
4  * Copyright 2002-2003 Jason Edmeades
5  * Copyright 2002-2003 Raphael Junqueira
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2006 Ivan Gyurdiev
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24
25 #include "config.h"
26
27 #include <math.h>
28 #include <stdio.h>
29
30 #include "wined3d_private.h"
31
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
35
36 /* Shader debugging - Change the following line to enable debugging of software
37       vertex shaders                                                             */
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
45
46 /**
47  * DirectX9 SDK download
48  *  http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
49  *
50  * Exploring D3DX
51  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
52  *
53  * Using Vertex Shaders
54  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
55  *
56  * Dx9 New
57  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
58  *
59  * Dx9 Shaders
60  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
64  *
65  * Dx9 D3DX
66  *  http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
67  *
68  * FVF
69  *  http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
70  *
71  * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72  *  http://developer.nvidia.com/view.asp?IO=vstovp
73  *
74  * NVIDIA: Memory Management with VAR
75  *  http://developer.nvidia.com/view.asp?IO=var_memory_management
76  */
77
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
80
81 #define GLNAME_REQUIRE_GLSL  ((const char *)1)
82
83 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
84     /* This table is not order or position dependent. */
85
86     /* Arithmetic */
87     {WINED3DSIO_NOP,    "nop",  "NOP", 0, 0, vshader_hw_map2gl,   NULL, 0, 0},
88     {WINED3DSIO_MOV,    "mov",  "MOV", 1, 2, vshader_hw_map2gl,   shader_glsl_mov, 0, 0},
89     {WINED3DSIO_MOVA,   "mova",  NULL, 1, 2, vshader_hw_map2gl,   shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
90     {WINED3DSIO_ADD,    "add",  "ADD", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
91     {WINED3DSIO_SUB,    "sub",  "SUB", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
92     {WINED3DSIO_MAD,    "mad",  "MAD", 1, 4, vshader_hw_map2gl,   shader_glsl_mad, 0, 0},
93     {WINED3DSIO_MUL,    "mul",  "MUL", 1, 3, vshader_hw_map2gl,   shader_glsl_arith, 0, 0},
94     {WINED3DSIO_RCP,    "rcp",  "RCP", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rcp, 0, 0},
95     {WINED3DSIO_RSQ,    "rsq",  "RSQ", 1, 2, vshader_hw_rsq_rcp,  shader_glsl_rsq, 0, 0},
96     {WINED3DSIO_DP3,    "dp3",  "DP3", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
97     {WINED3DSIO_DP4,    "dp4",  "DP4", 1, 3, vshader_hw_map2gl,   shader_glsl_dot, 0, 0},
98     {WINED3DSIO_MIN,    "min",  "MIN", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
99     {WINED3DSIO_MAX,    "max",  "MAX", 1, 3, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
100     {WINED3DSIO_SLT,    "slt",  "SLT", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
101     {WINED3DSIO_SGE,    "sge",  "SGE", 1, 3, vshader_hw_map2gl,   shader_glsl_compare, 0, 0},
102     {WINED3DSIO_ABS,    "abs",  "ABS", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
103     {WINED3DSIO_EXP,    "exp",  "EX2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
104     {WINED3DSIO_LOG,    "log",  "LG2", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
105     {WINED3DSIO_EXPP,   "expp", "EXP", 1, 2, vshader_hw_map2gl,   shader_glsl_expp, 0, 0},
106     {WINED3DSIO_LOGP,   "logp", "LOG", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
107     {WINED3DSIO_LIT,    "lit",  "LIT", 1, 2, vshader_hw_map2gl,   shader_glsl_lit, 0, 0},
108     {WINED3DSIO_DST,    "dst",  "DST", 1, 3, vshader_hw_map2gl,   shader_glsl_dst, 0, 0},
109     {WINED3DSIO_LRP,    "lrp",  "LRP", 1, 4, NULL,                shader_glsl_lrp, 0, 0},
110     {WINED3DSIO_FRC,    "frc",  "FRC", 1, 2, vshader_hw_map2gl,   shader_glsl_map2gl, 0, 0},
111     {WINED3DSIO_POW,    "pow",  "POW", 1, 3, vshader_hw_map2gl,   shader_glsl_pow, 0, 0},
112     {WINED3DSIO_CRS,    "crs",  "XPD", 1, 3, vshader_hw_map2gl,   shader_glsl_cross, 0, 0},
113     /* TODO: sng can possibly be performed a  s
114         RCP tmp, vec
115         MUL out, tmp, vec*/
116     {WINED3DSIO_SGN,  "sgn",  NULL,  1, 2, NULL,                shader_glsl_map2gl, 0, 0},
117     {WINED3DSIO_NRM,    "nrm",      NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
118     {WINED3DSIO_SINCOS, "sincos",   NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
119     {WINED3DSIO_SINCOS, "sincos",  "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
120     /* Matrix */
121     {WINED3DSIO_M4x4,   "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
122     {WINED3DSIO_M4x3,   "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
123     {WINED3DSIO_M3x4,   "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
124     {WINED3DSIO_M3x3,   "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
125     {WINED3DSIO_M3x2,   "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
126     /* Declare registers */
127     {WINED3DSIO_DCL,    "dcl",      NULL,                0, 2, NULL, NULL, 0, 0},
128     /* Constant definitions */
129     {WINED3DSIO_DEF,    "def",      NULL,                1, 5, NULL, NULL, 0, 0},
130     {WINED3DSIO_DEFB,   "defb",     GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
131     {WINED3DSIO_DEFI,   "defi",     GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
132     /* Flow control - requires GLSL or software shaders */
133     {WINED3DSIO_REP ,   "rep",      NULL, 0, 1, NULL, shader_glsl_rep,    WINED3DVS_VERSION(2,0), -1},
134     {WINED3DSIO_ENDREP, "endrep",   NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
135     {WINED3DSIO_IF,     "if",       NULL, 0, 1, NULL, shader_glsl_if,     WINED3DVS_VERSION(2,0), -1},
136     {WINED3DSIO_IFC,    "ifc",      NULL, 0, 2, NULL, shader_glsl_ifc,    WINED3DVS_VERSION(2,1), -1},
137     {WINED3DSIO_ELSE,   "else",     NULL, 0, 0, NULL, shader_glsl_else,   WINED3DVS_VERSION(2,0), -1},
138     {WINED3DSIO_ENDIF,  "endif",    NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
139     {WINED3DSIO_BREAK,  "break",    NULL, 0, 0, NULL, shader_glsl_break,  WINED3DVS_VERSION(2,1), -1},
140     {WINED3DSIO_BREAKC, "breakc",   NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
141     {WINED3DSIO_BREAKP, "breakp",   GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
142     {WINED3DSIO_CALL,   "call",     NULL, 0, 1, NULL, shader_glsl_call,   WINED3DVS_VERSION(2,0), -1},
143     {WINED3DSIO_CALLNZ, "callnz",   NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
144     {WINED3DSIO_LOOP,   "loop",     NULL, 0, 2, NULL, shader_glsl_loop,   WINED3DVS_VERSION(2,0), -1},
145     {WINED3DSIO_RET,    "ret",      NULL, 0, 0, NULL, NULL,               WINED3DVS_VERSION(2,0), -1},
146     {WINED3DSIO_ENDLOOP,"endloop",  NULL, 0, 0, NULL, shader_glsl_end,    WINED3DVS_VERSION(2,0), -1},
147     {WINED3DSIO_LABEL,  "label",    NULL, 0, 1, NULL, shader_glsl_label,  WINED3DVS_VERSION(2,0), -1},
148
149     {WINED3DSIO_SETP,   "setp",     GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
150     {WINED3DSIO_TEXLDL, "texldl",   NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
151     {0,                 NULL,       NULL,                0, 0, NULL, NULL, 0, 0}
152 };
153
154 static void vshader_set_limits(
155       IWineD3DVertexShaderImpl *This) {
156
157       This->baseShader.limits.texcoord = 0;
158       This->baseShader.limits.attributes = 16;
159       This->baseShader.limits.packed_input = 0;
160
161       /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
162       This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
163
164       switch (This->baseShader.hex_version) {
165           case WINED3DVS_VERSION(1,0):
166           case WINED3DVS_VERSION(1,1):
167                    This->baseShader.limits.temporary = 12;
168                    This->baseShader.limits.constant_bool = 0;
169                    This->baseShader.limits.constant_int = 0;
170                    This->baseShader.limits.address = 1;
171                    This->baseShader.limits.packed_output = 0;
172                    This->baseShader.limits.sampler = 0;
173                    This->baseShader.limits.label = 0;
174                    break;
175       
176           case WINED3DVS_VERSION(2,0):
177           case WINED3DVS_VERSION(2,1):
178                    This->baseShader.limits.temporary = 12;
179                    This->baseShader.limits.constant_bool = 16;
180                    This->baseShader.limits.constant_int = 16;
181                    This->baseShader.limits.address = 1;
182                    This->baseShader.limits.packed_output = 0;
183                    This->baseShader.limits.sampler = 0;
184                    This->baseShader.limits.label = 16;
185                    break;
186
187           case WINED3DVS_VERSION(3,0):
188                    This->baseShader.limits.temporary = 32;
189                    This->baseShader.limits.constant_bool = 32;
190                    This->baseShader.limits.constant_int = 32;
191                    This->baseShader.limits.address = 1;
192                    This->baseShader.limits.packed_output = 12;
193                    This->baseShader.limits.sampler = 4;
194                    This->baseShader.limits.label = 16; /* FIXME: 2048 */
195                    break;
196
197           default: This->baseShader.limits.temporary = 12;
198                    This->baseShader.limits.constant_bool = 16;
199                    This->baseShader.limits.constant_int = 16;
200                    This->baseShader.limits.address = 1;
201                    This->baseShader.limits.packed_output = 0;
202                    This->baseShader.limits.sampler = 0;
203                    This->baseShader.limits.label = 16;
204                    FIXME("Unrecognized vertex shader version %#x\n",
205                        This->baseShader.hex_version);
206       }
207 }
208
209 /* This is an internal function,
210  * used to create fake semantics for shaders
211  * that don't have them - d3d8 shaders where the declaration
212  * stores the register for each input
213  */
214 static void vshader_set_input(
215     IWineD3DVertexShaderImpl* This,
216     unsigned int regnum,
217     BYTE usage, BYTE usage_idx) {
218
219     /* Fake usage: set reserved bit, usage, usage_idx */
220     DWORD usage_token = (0x1 << 31) |
221         (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
222
223     /* Fake register; set reserved bit, regnum, type: input, wmask: all */
224     DWORD reg_token = (0x1 << 31) |
225         WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
226
227     This->semantics_in[regnum].usage = usage_token;
228     This->semantics_in[regnum].reg = reg_token;
229 }
230
231 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
232     if (usage_idx1 != usage_idx2) return FALSE;
233     if (usage1 == usage2) return TRUE;
234     if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
235     if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
236
237     return FALSE;
238 }
239
240 BOOL vshader_get_input(
241     IWineD3DVertexShader* iface,
242     BYTE usage_req, BYTE usage_idx_req,
243     unsigned int* regnum) {
244
245     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
246     int i;
247
248     for (i = 0; i < MAX_ATTRIBS; i++) {
249         DWORD usage_token = This->semantics_in[i].usage;
250         DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
251         DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
252
253         if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
254             *regnum = i;
255             return TRUE;
256         }
257     }
258     return FALSE;
259 }
260
261 BOOL vshader_input_is_color(
262     IWineD3DVertexShader* iface,
263     unsigned int regnum) {
264
265     IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
266     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
267     IWineD3DVertexDeclarationImpl *vertexDeclaration = (IWineD3DVertexDeclarationImpl *)deviceImpl->stateBlock->vertexDecl;
268
269     DWORD usage_token = This->semantics_in[regnum].usage;
270     DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
271     DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
272
273     if (vertexDeclaration) {
274         int i;
275         /* Find the declaration element that matches our register, then check
276          * if it has D3DCOLOR as it's type. This works for both d3d8 and d3d9. */
277         for (i = 0; i < vertexDeclaration->declarationWNumElements-1; ++i) {
278             WINED3DVERTEXELEMENT *element = vertexDeclaration->pDeclarationWine + i;
279             if (match_usage(element->Usage, element->UsageIndex, usage, usage_idx)) {
280                 return element->Type == WINED3DDECLTYPE_D3DCOLOR;
281             }
282         }
283     }
284
285     ERR("Either no vertexdeclaration present, or register not matched. This should never happen.\n");
286     return FALSE;
287 }
288
289 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
290     or GLSL and send it to the card */
291 static VOID IWineD3DVertexShaderImpl_GenerateShader(
292     IWineD3DVertexShader *iface,
293     shader_reg_maps* reg_maps,
294     CONST DWORD *pFunction) {
295
296     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
297     SHADER_BUFFER buffer;
298
299 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
300         it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
301     if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
302         HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
303         This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
304         This->fixupVertexBufferSize = PGMSIZE;
305         This->fixupVertexBuffer[0] = 0;
306     }
307     buffer.buffer = This->device->fixupVertexBuffer;
308 #else
309     buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE); 
310 #endif
311     buffer.bsize = 0;
312     buffer.lineNo = 0;
313     buffer.newline = TRUE;
314
315     if (This->baseShader.shader_mode == SHADER_GLSL) {
316
317         /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
318         GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
319
320         /* Base Declarations */
321         shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
322
323         /* Base Shader Body */
324         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
325
326         /* Unpack 3.0 outputs */
327         if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0)) {
328             shader_addline(&buffer, "order_ps_input(OUT);\n");
329         } else {
330             shader_addline(&buffer, "order_ps_input();\n");
331         }
332
333         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
334         if (!reg_maps->fog)
335             shader_addline(&buffer, "gl_FogFragCoord = gl_Position.z;\n");
336
337         /* Write the final position.
338          *
339          * OpenGL coordinates specify the center of the pixel while d3d coords specify
340          * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
341          * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
342          * contains 1.0 to allow a mad.
343          */
344         shader_addline(&buffer, "gl_Position.xy = gl_Position.xy * posFixup.xy + posFixup.zw;\n");
345
346         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
347          *
348          * Basically we want(in homogenous coordinates) z = z * 2 - 1. However, shaders are run
349          * before the homogenous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
350          * which is the same as z = z / 2 - w.
351          */
352         shader_addline(&buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
353
354         shader_addline(&buffer, "}\n");
355
356         TRACE("Compiling shader object %u\n", shader_obj);
357         GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
358         GL_EXTCALL(glCompileShaderARB(shader_obj));
359         print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
360
361         /* Store the shader object */
362         This->baseShader.prgId = shader_obj;
363
364     } else if (This->baseShader.shader_mode == SHADER_ARB) {
365
366         /*  Create the hw ARB shader */
367         shader_addline(&buffer, "!!ARBvp1.0\n");
368         shader_addline(&buffer, "PARAM helper_const = { 2.0, -1.0, %d.0, 0.0 };\n", This->rel_offset);
369
370         /* Mesa supports only 95 constants */
371         if (GL_VEND(MESA) || GL_VEND(WINE))
372             This->baseShader.limits.constant_float = 
373                 min(95, This->baseShader.limits.constant_float);
374
375         /* Some instructions need a temporary register. Add it if needed, but only if it is really needed */
376         if(reg_maps->usesnrm || This->rel_offset) {
377             shader_addline(&buffer, "TEMP TMP;\n");
378         }
379
380         /* Base Declarations */
381         shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
382
383         /* We need a constant to fixup the final position */
384         shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
385
386         /* Base Shader Body */
387         shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
388
389         /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
390         if (!reg_maps->fog)
391             shader_addline(&buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
392
393         /* Write the final position.
394          *
395          * OpenGL coordinates specify the center of the pixel while d3d coords specify
396          * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
397          * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
398          * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
399          */
400         shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, posFixup.z;\n");
401         shader_addline(&buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, posFixup.w;\n");
402
403         /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
404          * and the glsl equivalent
405          */
406         shader_addline(&buffer, "MAD TMP_OUT.z, TMP_OUT.z, helper_const.x, -TMP_OUT.w;\n");
407
408         shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
409         
410         shader_addline(&buffer, "END\n"); 
411
412         /* TODO: change to resource.glObjectHandle or something like that */
413         GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
414
415         TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
416         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
417
418         TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
419         /* Create the program and check for errors */
420         GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
421             buffer.bsize, buffer.buffer));
422
423         if (glGetError() == GL_INVALID_OPERATION) {
424             GLint errPos;
425             glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
426             FIXME("HW VertexShader Error at position %d: %s\n",
427                   errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
428             This->baseShader.prgId = -1;
429         }
430     }
431
432 #if 1 /* if were using the data buffer of device then we don't need to free it */
433   HeapFree(GetProcessHeap(), 0, buffer.buffer);
434 #endif
435 }
436
437 /* *******************************************
438    IWineD3DVertexShader IUnknown parts follow
439    ******************************************* */
440 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
441 {
442     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
443     TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
444     if (IsEqualGUID(riid, &IID_IUnknown) 
445         || IsEqualGUID(riid, &IID_IWineD3DBase)
446         || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
447         || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
448         IUnknown_AddRef(iface);
449         *ppobj = This;
450         return S_OK;
451     }
452     *ppobj = NULL;
453     return E_NOINTERFACE;
454 }
455
456 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
457     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
458     TRACE("(%p) : AddRef increasing from %d\n", This, This->ref);
459     return InterlockedIncrement(&This->ref);
460 }
461
462 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
463     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
464     ULONG ref;
465     TRACE("(%p) : Releasing from %d\n", This, This->ref);
466     ref = InterlockedDecrement(&This->ref);
467     if (ref == 0) {
468         if (This->baseShader.shader_mode == SHADER_GLSL && This->baseShader.prgId != 0) {
469             struct list *linked_programs = &This->baseShader.linked_programs;
470
471             TRACE("Deleting linked programs\n");
472             if (linked_programs->next) {
473                 struct glsl_shader_prog_link *entry, *entry2;
474                 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
475                     delete_glsl_program_entry(This->baseShader.device, entry);
476                 }
477             }
478
479             TRACE("Deleting shader object %u\n", This->baseShader.prgId);
480             GL_EXTCALL(glDeleteObjectARB(This->baseShader.prgId));
481             checkGLcall("glDeleteObjectARB");
482         }
483         shader_delete_constant_list(&This->baseShader.constantsF);
484         shader_delete_constant_list(&This->baseShader.constantsB);
485         shader_delete_constant_list(&This->baseShader.constantsI);
486         HeapFree(GetProcessHeap(), 0, This);
487
488     }
489     return ref;
490 }
491
492 /* *******************************************
493    IWineD3DVertexShader IWineD3DVertexShader parts follow
494    ******************************************* */
495
496 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
497     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
498     
499     *parent = This->parent;
500     IUnknown_AddRef(*parent);
501     TRACE("(%p) : returning %p\n", This, *parent);
502     return WINED3D_OK;
503 }
504
505 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
506     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
507     IWineD3DDevice_AddRef(This->baseShader.device);
508     *pDevice = This->baseShader.device;
509     TRACE("(%p) returning %p\n", This, *pDevice);
510     return WINED3D_OK;
511 }
512
513 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
514     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
515     TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
516
517     if (NULL == pData) {
518         *pSizeOfData = This->baseShader.functionLength;
519         return WINED3D_OK;
520     }
521     if (*pSizeOfData < This->baseShader.functionLength) {
522         /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
523          * than the required size we should write the required size and
524          * return D3DERR_MOREDATA. That's not actually true. */
525         return WINED3DERR_INVALIDCALL;
526     }
527     if (NULL == This->baseShader.function) { /* no function defined */
528         TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
529         (*(DWORD **) pData) = NULL;
530     } else {
531         if(This->baseShader.functionLength == 0){
532
533         }
534         TRACE("(%p) : GetFunction copying to %p\n", This, pData);
535         memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
536     }
537     return WINED3D_OK;
538 }
539
540 /* Note that for vertex shaders CompileShader isn't called until the
541  * shader is first used. The reason for this is that we need the vertex
542  * declaration the shader will be used with in order to determine if
543  * the data in a register is of type D3DCOLOR, and needs swizzling. */
544 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
545
546     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
547     IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
548     HRESULT hr;
549     shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
550
551     TRACE("(%p) : pFunction %p\n", iface, pFunction);
552
553     /* First pass: trace shader */
554     shader_trace_init((IWineD3DBaseShader*) This, pFunction);
555     vshader_set_limits(This);
556
557     /* Initialize immediate constant lists */
558     list_init(&This->baseShader.constantsF);
559     list_init(&This->baseShader.constantsB);
560     list_init(&This->baseShader.constantsI);
561
562     /* Second pass: figure out registers used, semantics, etc.. */
563     This->min_rel_offset = GL_LIMITS(vshader_constantsF);
564     This->max_rel_offset = 0;
565     memset(reg_maps, 0, sizeof(shader_reg_maps));
566     hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
567        This->semantics_in, This->semantics_out, pFunction, NULL);
568     if (hr != WINED3D_OK) return hr;
569
570     This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
571
572     if(deviceImpl->vs_selected_mode == SHADER_ARB &&
573        (GLINFO_LOCATION).arb_vs_offset_limit      &&
574        This->min_rel_offset <= This->max_rel_offset) {
575
576         if(This->max_rel_offset - This->min_rel_offset > 127) {
577             FIXME("The difference between the minimum and maximum relative offset is > 127\n");
578             FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
579             FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
580         } else if(This->max_rel_offset - This->min_rel_offset > 63) {
581             This->rel_offset = This->min_rel_offset + 63;
582         } else if(This->max_rel_offset > 63) {
583             This->rel_offset = This->min_rel_offset;
584         } else {
585             This->rel_offset = 0;
586         }
587     }
588     This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
589
590     /* copy the function ... because it will certainly be released by application */
591     if (NULL != pFunction) {
592         void *function;
593
594         function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
595         if (!function) return E_OUTOFMEMORY;
596         memcpy(function, pFunction, This->baseShader.functionLength);
597         This->baseShader.function = function;
598     } else {
599         This->baseShader.function = NULL;
600     }
601
602     return WINED3D_OK;
603 }
604
605 /* Preload semantics for d3d8 shaders */
606 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
607     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
608     IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
609
610     int i;
611     for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
612         WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
613         vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
614     }
615 }
616
617 /* Set local constants for d3d8 shaders */
618 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
619         UINT start_idx, const float *src_data, UINT count) {
620     IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
621     UINT i, end_idx;
622
623     TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
624
625     end_idx = start_idx + count;
626     if (end_idx > GL_LIMITS(vshader_constantsF)) {
627         WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
628         end_idx = GL_LIMITS(vshader_constantsF);
629     }
630
631     for (i = start_idx; i < end_idx; ++i) {
632         local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
633         if (!lconst) return E_OUTOFMEMORY;
634
635         lconst->idx = i;
636         memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
637         list_add_head(&This->baseShader.constantsF, &lconst->entry);
638     }
639
640     return WINED3D_OK;
641 }
642
643 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
644     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
645     CONST DWORD *function = This->baseShader.function;
646
647     TRACE("(%p) : function %p\n", iface, function);
648
649     /* We're already compiled. */
650     if (This->baseShader.is_compiled) return WINED3D_OK;
651
652     /* We don't need to compile */
653     if (!function) {
654         This->baseShader.is_compiled = TRUE;
655         return WINED3D_OK;
656     }
657
658     /* Generate the HW shader */
659     TRACE("(%p) : Generating hardware program\n", This);
660     IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
661
662     This->baseShader.is_compiled = TRUE;
663
664     return WINED3D_OK;
665 }
666
667 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
668 {
669     /*** IUnknown methods ***/
670     IWineD3DVertexShaderImpl_QueryInterface,
671     IWineD3DVertexShaderImpl_AddRef,
672     IWineD3DVertexShaderImpl_Release,
673     /*** IWineD3DBase methods ***/
674     IWineD3DVertexShaderImpl_GetParent,
675     /*** IWineD3DBaseShader methods ***/
676     IWineD3DVertexShaderImpl_SetFunction,
677     IWineD3DVertexShaderImpl_CompileShader,
678     /*** IWineD3DVertexShader methods ***/
679     IWineD3DVertexShaderImpl_GetDevice,
680     IWineD3DVertexShaderImpl_GetFunction,
681     IWineD3DVertexShaderImpl_FakeSemantics,
682     IWIneD3DVertexShaderImpl_SetLocalConstantsF
683 };