wined3d: Get rid of a few stack buffers.
[wine] / dlls / wined3d / glsl_shader.c
1 /*
2  * GLSL pixel and vertex shader implementation
3  *
4  * Copyright 2006 Jason Green 
5  * Copyright 2006-2007 Henri Verbeet
6  * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21  */
22
23 /*
24  * D3D shader asm has swizzles on source parameters, and write masks for
25  * destination parameters. GLSL uses swizzles for both. The result of this is
26  * that for example "mov dst.xw, src.zyxw" becomes "dst.xw = src.zw" in GLSL.
27  * Ie, to generate a proper GLSL source swizzle, we need to take the D3D write
28  * mask for the destination parameter into account.
29  */
30
31 #include "config.h"
32 #include <limits.h>
33 #include <stdio.h>
34 #include "wined3d_private.h"
35
36 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
37 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
38 WINE_DECLARE_DEBUG_CHANNEL(d3d_caps);
39 WINE_DECLARE_DEBUG_CHANNEL(d3d);
40
41 #define GLINFO_LOCATION      (*gl_info)
42
43 #define WINED3D_GLSL_SAMPLE_PROJECTED   0x1
44 #define WINED3D_GLSL_SAMPLE_RECT        0x2
45 #define WINED3D_GLSL_SAMPLE_LOD         0x4
46
47 typedef struct {
48     char reg_name[150];
49     char mask_str[6];
50 } glsl_dst_param_t;
51
52 typedef struct {
53     char reg_name[150];
54     char param_str[100];
55 } glsl_src_param_t;
56
57 typedef struct {
58     const char *name;
59     DWORD coord_mask;
60 } glsl_sample_function_t;
61
62 enum heap_node_op
63 {
64     HEAP_NODE_TRAVERSE_LEFT,
65     HEAP_NODE_TRAVERSE_RIGHT,
66     HEAP_NODE_POP,
67 };
68
69 struct constant_entry
70 {
71     unsigned int idx;
72     unsigned int version;
73 };
74
75 struct constant_heap
76 {
77     struct constant_entry *entries;
78     unsigned int *positions;
79     unsigned int size;
80 };
81
82 /* GLSL shader private data */
83 struct shader_glsl_priv {
84     struct hash_table_t *glsl_program_lookup;
85     struct glsl_shader_prog_link *glsl_program;
86     struct constant_heap vconst_heap;
87     struct constant_heap pconst_heap;
88     unsigned char *stack;
89     GLhandleARB depth_blt_program[tex_type_count];
90     UINT next_constant_version;
91 };
92
93 /* Struct to maintain data about a linked GLSL program */
94 struct glsl_shader_prog_link {
95     struct list                 vshader_entry;
96     struct list                 pshader_entry;
97     GLhandleARB                 programId;
98     GLhandleARB                 *vuniformF_locations;
99     GLhandleARB                 *puniformF_locations;
100     GLhandleARB                 vuniformI_locations[MAX_CONST_I];
101     GLhandleARB                 puniformI_locations[MAX_CONST_I];
102     GLhandleARB                 posFixup_location;
103     GLhandleARB                 bumpenvmat_location[MAX_TEXTURES];
104     GLhandleARB                 luminancescale_location[MAX_TEXTURES];
105     GLhandleARB                 luminanceoffset_location[MAX_TEXTURES];
106     GLhandleARB                 ycorrection_location;
107     GLenum                      vertex_color_clamp;
108     IWineD3DVertexShader        *vshader;
109     IWineD3DPixelShader         *pshader;
110     struct vs_compile_args      vs_args;
111     struct ps_compile_args      ps_args;
112     UINT                        constant_version;
113 };
114
115 typedef struct {
116     IWineD3DVertexShader        *vshader;
117     IWineD3DPixelShader         *pshader;
118     struct ps_compile_args      ps_args;
119     struct vs_compile_args      vs_args;
120 } glsl_program_key_t;
121
122
123 /** Prints the GLSL info log which will contain error messages if they exist */
124 static void print_glsl_info_log(const WineD3D_GL_Info *gl_info, GLhandleARB obj)
125 {
126     int infologLength = 0;
127     char *infoLog;
128     unsigned int i;
129     BOOL is_spam;
130
131     static const char * const spam[] =
132     {
133         "Vertex shader was successfully compiled to run on hardware.\n",    /* fglrx          */
134         "Fragment shader was successfully compiled to run on hardware.\n",  /* fglrx          */
135         "Fragment shader(s) linked, vertex shader(s) linked. \n ",          /* fglrx, with \n */
136         "Fragment shader(s) linked, vertex shader(s) linked.",              /* fglrx, no \n   */
137         "Vertex shader(s) linked, no fragment shader(s) defined. \n ",      /* fglrx, with \n */
138         "Vertex shader(s) linked, no fragment shader(s) defined.",          /* fglrx, no \n   */
139         "Fragment shader was successfully compiled to run on hardware.\n"
140         "WARNING: 0:2: extension 'GL_ARB_draw_buffers' is not supported",
141         "Fragment shader(s) linked, no vertex shader(s) defined.",          /* fglrx, no \n   */
142         "Fragment shader(s) linked, no vertex shader(s) defined. \n ",      /* fglrx, with \n */
143         "WARNING: 0:2: extension 'GL_ARB_draw_buffers' is not supported\n"  /* MacOS ati      */
144     };
145
146     if (!TRACE_ON(d3d_shader) && !FIXME_ON(d3d_shader)) return;
147
148     GL_EXTCALL(glGetObjectParameterivARB(obj,
149                GL_OBJECT_INFO_LOG_LENGTH_ARB,
150                &infologLength));
151
152     /* A size of 1 is just a null-terminated string, so the log should be bigger than
153      * that if there are errors. */
154     if (infologLength > 1)
155     {
156         /* Fglrx doesn't terminate the string properly, but it tells us the proper length.
157          * So use HEAP_ZERO_MEMORY to avoid uninitialized bytes
158          */
159         infoLog = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, infologLength);
160         GL_EXTCALL(glGetInfoLogARB(obj, infologLength, NULL, infoLog));
161         is_spam = FALSE;
162
163         for(i = 0; i < sizeof(spam) / sizeof(spam[0]); i++) {
164             if(strcmp(infoLog, spam[i]) == 0) {
165                 is_spam = TRUE;
166                 break;
167             }
168         }
169         if(is_spam) {
170             TRACE("Spam received from GLSL shader #%u: %s\n", obj, debugstr_a(infoLog));
171         } else {
172             FIXME("Error received from GLSL shader #%u: %s\n", obj, debugstr_a(infoLog));
173         }
174         HeapFree(GetProcessHeap(), 0, infoLog);
175     }
176 }
177
178 /**
179  * Loads (pixel shader) samplers
180  */
181 static void shader_glsl_load_psamplers(const WineD3D_GL_Info *gl_info, IWineD3DStateBlock *iface, GLhandleARB programId)
182 {
183     IWineD3DStateBlockImpl* stateBlock = (IWineD3DStateBlockImpl*) iface;
184     GLhandleARB name_loc;
185     int i;
186     char sampler_name[20];
187
188     for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) {
189         snprintf(sampler_name, sizeof(sampler_name), "Psampler%d", i);
190         name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
191         if (name_loc != -1) {
192             DWORD mapped_unit = stateBlock->wineD3DDevice->texUnitMap[i];
193             if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < GL_LIMITS(fragment_samplers))
194             {
195                 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
196                 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
197                 checkGLcall("glUniform1iARB");
198             } else {
199                 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
200             }
201         }
202     }
203 }
204
205 static void shader_glsl_load_vsamplers(const WineD3D_GL_Info *gl_info, IWineD3DStateBlock *iface, GLhandleARB programId)
206 {
207     IWineD3DStateBlockImpl* stateBlock = (IWineD3DStateBlockImpl*) iface;
208     GLhandleARB name_loc;
209     char sampler_name[20];
210     int i;
211
212     for (i = 0; i < MAX_VERTEX_SAMPLERS; ++i) {
213         snprintf(sampler_name, sizeof(sampler_name), "Vsampler%d", i);
214         name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
215         if (name_loc != -1) {
216             DWORD mapped_unit = stateBlock->wineD3DDevice->texUnitMap[MAX_FRAGMENT_SAMPLERS + i];
217             if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < GL_LIMITS(combined_samplers))
218             {
219                 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
220                 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
221                 checkGLcall("glUniform1iARB");
222             } else {
223                 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
224             }
225         }
226     }
227 }
228
229 static inline void walk_constant_heap(const WineD3D_GL_Info *gl_info, const float *constants,
230         const GLhandleARB *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
231 {
232     int stack_idx = 0;
233     unsigned int heap_idx = 1;
234     unsigned int idx;
235
236     if (heap->entries[heap_idx].version <= version) return;
237
238     idx = heap->entries[heap_idx].idx;
239     if (constant_locations[idx] != -1) GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
240     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
241
242     while (stack_idx >= 0)
243     {
244         /* Note that we fall through to the next case statement. */
245         switch(stack[stack_idx])
246         {
247             case HEAP_NODE_TRAVERSE_LEFT:
248             {
249                 unsigned int left_idx = heap_idx << 1;
250                 if (left_idx < heap->size && heap->entries[left_idx].version > version)
251                 {
252                     heap_idx = left_idx;
253                     idx = heap->entries[heap_idx].idx;
254                     if (constant_locations[idx] != -1)
255                         GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
256
257                     stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
258                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
259                     break;
260                 }
261             }
262
263             case HEAP_NODE_TRAVERSE_RIGHT:
264             {
265                 unsigned int right_idx = (heap_idx << 1) + 1;
266                 if (right_idx < heap->size && heap->entries[right_idx].version > version)
267                 {
268                     heap_idx = right_idx;
269                     idx = heap->entries[heap_idx].idx;
270                     if (constant_locations[idx] != -1)
271                         GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
272
273                     stack[stack_idx++] = HEAP_NODE_POP;
274                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
275                     break;
276                 }
277             }
278
279             case HEAP_NODE_POP:
280             {
281                 heap_idx >>= 1;
282                 --stack_idx;
283                 break;
284             }
285         }
286     }
287     checkGLcall("walk_constant_heap()");
288 }
289
290 static inline void apply_clamped_constant(const WineD3D_GL_Info *gl_info, GLint location, const GLfloat *data)
291 {
292     GLfloat clamped_constant[4];
293
294     if (location == -1) return;
295
296     clamped_constant[0] = data[0] < -1.0f ? -1.0f : data[0] > 1.0 ? 1.0 : data[0];
297     clamped_constant[1] = data[1] < -1.0f ? -1.0f : data[1] > 1.0 ? 1.0 : data[1];
298     clamped_constant[2] = data[2] < -1.0f ? -1.0f : data[2] > 1.0 ? 1.0 : data[2];
299     clamped_constant[3] = data[3] < -1.0f ? -1.0f : data[3] > 1.0 ? 1.0 : data[3];
300
301     GL_EXTCALL(glUniform4fvARB(location, 1, clamped_constant));
302 }
303
304 static inline void walk_constant_heap_clamped(const WineD3D_GL_Info *gl_info, const float *constants,
305         const GLhandleARB *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
306 {
307     int stack_idx = 0;
308     unsigned int heap_idx = 1;
309     unsigned int idx;
310
311     if (heap->entries[heap_idx].version <= version) return;
312
313     idx = heap->entries[heap_idx].idx;
314     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
315     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
316
317     while (stack_idx >= 0)
318     {
319         /* Note that we fall through to the next case statement. */
320         switch(stack[stack_idx])
321         {
322             case HEAP_NODE_TRAVERSE_LEFT:
323             {
324                 unsigned int left_idx = heap_idx << 1;
325                 if (left_idx < heap->size && heap->entries[left_idx].version > version)
326                 {
327                     heap_idx = left_idx;
328                     idx = heap->entries[heap_idx].idx;
329                     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
330
331                     stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
332                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
333                     break;
334                 }
335             }
336
337             case HEAP_NODE_TRAVERSE_RIGHT:
338             {
339                 unsigned int right_idx = (heap_idx << 1) + 1;
340                 if (right_idx < heap->size && heap->entries[right_idx].version > version)
341                 {
342                     heap_idx = right_idx;
343                     idx = heap->entries[heap_idx].idx;
344                     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
345
346                     stack[stack_idx++] = HEAP_NODE_POP;
347                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
348                     break;
349                 }
350             }
351
352             case HEAP_NODE_POP:
353             {
354                 heap_idx >>= 1;
355                 --stack_idx;
356                 break;
357             }
358         }
359     }
360     checkGLcall("walk_constant_heap_clamped()");
361 }
362
363 /* Loads floating point constants (aka uniforms) into the currently set GLSL program. */
364 static void shader_glsl_load_constantsF(IWineD3DBaseShaderImpl *This, const WineD3D_GL_Info *gl_info,
365         const float *constants, const GLhandleARB *constant_locations, const struct constant_heap *heap,
366         unsigned char *stack, UINT version)
367 {
368     const local_constant *lconst;
369
370     /* 1.X pshaders have the constants clamped to [-1;1] implicitly. */
371     if (WINED3DSHADER_VERSION_MAJOR(This->baseShader.reg_maps.shader_version) == 1
372             && shader_is_pshader_version(This->baseShader.reg_maps.shader_version))
373         walk_constant_heap_clamped(gl_info, constants, constant_locations, heap, stack, version);
374     else
375         walk_constant_heap(gl_info, constants, constant_locations, heap, stack, version);
376
377     if (!This->baseShader.load_local_constsF)
378     {
379         TRACE("No need to load local float constants for this shader\n");
380         return;
381     }
382
383     /* Immediate constants are clamped to [-1;1] at shader creation time if needed */
384     LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry)
385     {
386         GLhandleARB location = constant_locations[lconst->idx];
387         /* We found this uniform name in the program - go ahead and send the data */
388         if (location != -1) GL_EXTCALL(glUniform4fvARB(location, 1, (const GLfloat *)lconst->value));
389     }
390     checkGLcall("glUniform4fvARB()");
391 }
392
393 /* Loads integer constants (aka uniforms) into the currently set GLSL program. */
394 static void shader_glsl_load_constantsI(IWineD3DBaseShaderImpl *This, const WineD3D_GL_Info *gl_info,
395         const GLhandleARB locations[MAX_CONST_I], const int *constants, WORD constants_set)
396 {
397     unsigned int i;
398     struct list* ptr;
399
400     for (i = 0; constants_set; constants_set >>= 1, ++i)
401     {
402         if (!(constants_set & 1)) continue;
403
404         TRACE_(d3d_constants)("Loading constants %u: %i, %i, %i, %i\n",
405                 i, constants[i*4], constants[i*4+1], constants[i*4+2], constants[i*4+3]);
406
407         /* We found this uniform name in the program - go ahead and send the data */
408         GL_EXTCALL(glUniform4ivARB(locations[i], 1, &constants[i*4]));
409         checkGLcall("glUniform4ivARB");
410     }
411
412     /* Load immediate constants */
413     ptr = list_head(&This->baseShader.constantsI);
414     while (ptr) {
415         const struct local_constant *lconst = LIST_ENTRY(ptr, const struct local_constant, entry);
416         unsigned int idx = lconst->idx;
417         const GLint *values = (const GLint *)lconst->value;
418
419         TRACE_(d3d_constants)("Loading local constants %i: %i, %i, %i, %i\n", idx,
420             values[0], values[1], values[2], values[3]);
421
422         /* We found this uniform name in the program - go ahead and send the data */
423         GL_EXTCALL(glUniform4ivARB(locations[idx], 1, values));
424         checkGLcall("glUniform4ivARB");
425         ptr = list_next(&This->baseShader.constantsI, ptr);
426     }
427 }
428
429 /* Loads boolean constants (aka uniforms) into the currently set GLSL program. */
430 static void shader_glsl_load_constantsB(IWineD3DBaseShaderImpl *This, const WineD3D_GL_Info *gl_info,
431         GLhandleARB programId, const BOOL *constants, WORD constants_set)
432 {
433     GLhandleARB tmp_loc;
434     unsigned int i;
435     char tmp_name[8];
436     char is_pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version);
437     const char* prefix = is_pshader? "PB":"VB";
438     struct list* ptr;
439
440     /* TODO: Benchmark and see if it would be beneficial to store the
441      * locations of the constants to avoid looking up each time */
442     for (i = 0; constants_set; constants_set >>= 1, ++i)
443     {
444         if (!(constants_set & 1)) continue;
445
446         TRACE_(d3d_constants)("Loading constants %i: %i;\n", i, constants[i]);
447
448         /* TODO: Benchmark and see if it would be beneficial to store the
449          * locations of the constants to avoid looking up each time */
450         snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, i);
451         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
452         if (tmp_loc != -1)
453         {
454             /* We found this uniform name in the program - go ahead and send the data */
455             GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, &constants[i]));
456             checkGLcall("glUniform1ivARB");
457         }
458     }
459
460     /* Load immediate constants */
461     ptr = list_head(&This->baseShader.constantsB);
462     while (ptr) {
463         const struct local_constant *lconst = LIST_ENTRY(ptr, const struct local_constant, entry);
464         unsigned int idx = lconst->idx;
465         const GLint *values = (const GLint *)lconst->value;
466
467         TRACE_(d3d_constants)("Loading local constants %i: %i\n", idx, values[0]);
468
469         snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, idx);
470         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
471         if (tmp_loc != -1) {
472             /* We found this uniform name in the program - go ahead and send the data */
473             GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, values));
474             checkGLcall("glUniform1ivARB");
475         }
476         ptr = list_next(&This->baseShader.constantsB, ptr);
477     }
478 }
479
480 static void reset_program_constant_version(void *value, void *context)
481 {
482     struct glsl_shader_prog_link *entry = value;
483     entry->constant_version = 0;
484 }
485
486 /**
487  * Loads the app-supplied constants into the currently set GLSL program.
488  */
489 static void shader_glsl_load_constants(
490     IWineD3DDevice* device,
491     char usePixelShader,
492     char useVertexShader) {
493    
494     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) device;
495     struct shader_glsl_priv *priv = deviceImpl->shader_priv;
496     IWineD3DStateBlockImpl* stateBlock = deviceImpl->stateBlock;
497     const WineD3D_GL_Info *gl_info = &deviceImpl->adapter->gl_info;
498
499     GLhandleARB programId;
500     struct glsl_shader_prog_link *prog = priv->glsl_program;
501     UINT constant_version;
502     int i;
503
504     if (!prog) {
505         /* No GLSL program set - nothing to do. */
506         return;
507     }
508     programId = prog->programId;
509     constant_version = prog->constant_version;
510
511     if (useVertexShader) {
512         IWineD3DBaseShaderImpl* vshader = (IWineD3DBaseShaderImpl*) stateBlock->vertexShader;
513
514         /* Load DirectX 9 float constants/uniforms for vertex shader */
515         shader_glsl_load_constantsF(vshader, gl_info, stateBlock->vertexShaderConstantF,
516                 prog->vuniformF_locations, &priv->vconst_heap, priv->stack, constant_version);
517
518         /* Load DirectX 9 integer constants/uniforms for vertex shader */
519         if(vshader->baseShader.uses_int_consts) {
520             shader_glsl_load_constantsI(vshader, gl_info, prog->vuniformI_locations,
521                     stateBlock->vertexShaderConstantI, stateBlock->changed.vertexShaderConstantsI);
522         }
523
524         /* Load DirectX 9 boolean constants/uniforms for vertex shader */
525         if(vshader->baseShader.uses_bool_consts) {
526             shader_glsl_load_constantsB(vshader, gl_info, programId,
527                     stateBlock->vertexShaderConstantB, stateBlock->changed.vertexShaderConstantsB);
528         }
529
530         /* Upload the position fixup params */
531         GL_EXTCALL(glUniform4fvARB(prog->posFixup_location, 1, &deviceImpl->posFixup[0]));
532         checkGLcall("glUniform4fvARB");
533     }
534
535     if (usePixelShader) {
536
537         IWineD3DBaseShaderImpl* pshader = (IWineD3DBaseShaderImpl*) stateBlock->pixelShader;
538
539         /* Load DirectX 9 float constants/uniforms for pixel shader */
540         shader_glsl_load_constantsF(pshader, gl_info, stateBlock->pixelShaderConstantF,
541                 prog->puniformF_locations, &priv->pconst_heap, priv->stack, constant_version);
542
543         /* Load DirectX 9 integer constants/uniforms for pixel shader */
544         if(pshader->baseShader.uses_int_consts) {
545             shader_glsl_load_constantsI(pshader, gl_info, prog->puniformI_locations,
546                     stateBlock->pixelShaderConstantI, stateBlock->changed.pixelShaderConstantsI);
547         }
548
549         /* Load DirectX 9 boolean constants/uniforms for pixel shader */
550         if(pshader->baseShader.uses_bool_consts) {
551             shader_glsl_load_constantsB(pshader, gl_info, programId,
552                     stateBlock->pixelShaderConstantB, stateBlock->changed.pixelShaderConstantsB);
553         }
554
555         /* Upload the environment bump map matrix if needed. The needsbumpmat member specifies the texture stage to load the matrix from.
556          * It can't be 0 for a valid texbem instruction.
557          */
558         for(i = 0; i < ((IWineD3DPixelShaderImpl *) pshader)->numbumpenvmatconsts; i++) {
559             IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) pshader;
560             int stage = ps->luminanceconst[i].texunit;
561
562             const float *data = (const float *)&stateBlock->textureState[(int)ps->bumpenvmatconst[i].texunit][WINED3DTSS_BUMPENVMAT00];
563             GL_EXTCALL(glUniformMatrix2fvARB(prog->bumpenvmat_location[i], 1, 0, data));
564             checkGLcall("glUniformMatrix2fvARB");
565
566             /* texbeml needs the luminance scale and offset too. If texbeml is used, needsbumpmat
567              * is set too, so we can check that in the needsbumpmat check
568              */
569             if(ps->baseShader.reg_maps.luminanceparams[stage]) {
570                 const GLfloat *scale = (const GLfloat *)&stateBlock->textureState[stage][WINED3DTSS_BUMPENVLSCALE];
571                 const GLfloat *offset = (const GLfloat *)&stateBlock->textureState[stage][WINED3DTSS_BUMPENVLOFFSET];
572
573                 GL_EXTCALL(glUniform1fvARB(prog->luminancescale_location[i], 1, scale));
574                 checkGLcall("glUniform1fvARB");
575                 GL_EXTCALL(glUniform1fvARB(prog->luminanceoffset_location[i], 1, offset));
576                 checkGLcall("glUniform1fvARB");
577             }
578         }
579
580         if(((IWineD3DPixelShaderImpl *) pshader)->vpos_uniform) {
581             float correction_params[4];
582             if(deviceImpl->render_offscreen) {
583                 correction_params[0] = 0.0;
584                 correction_params[1] = 1.0;
585             } else {
586                 /* position is window relative, not viewport relative */
587                 correction_params[0] = ((IWineD3DSurfaceImpl *) deviceImpl->render_targets[0])->currentDesc.Height;
588                 correction_params[1] = -1.0;
589             }
590             GL_EXTCALL(glUniform4fvARB(prog->ycorrection_location, 1, correction_params));
591         }
592     }
593
594     if (priv->next_constant_version == UINT_MAX)
595     {
596         TRACE("Max constant version reached, resetting to 0.\n");
597         hash_table_for_each_entry(priv->glsl_program_lookup, reset_program_constant_version, NULL);
598         priv->next_constant_version = 1;
599     }
600     else
601     {
602         prog->constant_version = priv->next_constant_version++;
603     }
604 }
605
606 static inline void update_heap_entry(struct constant_heap *heap, unsigned int idx,
607         unsigned int heap_idx, DWORD new_version)
608 {
609     struct constant_entry *entries = heap->entries;
610     unsigned int *positions = heap->positions;
611     unsigned int parent_idx;
612
613     while (heap_idx > 1)
614     {
615         parent_idx = heap_idx >> 1;
616
617         if (new_version <= entries[parent_idx].version) break;
618
619         entries[heap_idx] = entries[parent_idx];
620         positions[entries[parent_idx].idx] = heap_idx;
621         heap_idx = parent_idx;
622     }
623
624     entries[heap_idx].version = new_version;
625     entries[heap_idx].idx = idx;
626     positions[idx] = heap_idx;
627 }
628
629 static void shader_glsl_update_float_vertex_constants(IWineD3DDevice *iface, UINT start, UINT count)
630 {
631     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
632     struct shader_glsl_priv *priv = This->shader_priv;
633     struct constant_heap *heap = &priv->vconst_heap;
634     UINT i;
635
636     for (i = start; i < count + start; ++i)
637     {
638         if (!This->stateBlock->changed.vertexShaderConstantsF[i])
639             update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
640         else
641             update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
642     }
643 }
644
645 static void shader_glsl_update_float_pixel_constants(IWineD3DDevice *iface, UINT start, UINT count)
646 {
647     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
648     struct shader_glsl_priv *priv = This->shader_priv;
649     struct constant_heap *heap = &priv->pconst_heap;
650     UINT i;
651
652     for (i = start; i < count + start; ++i)
653     {
654         if (!This->stateBlock->changed.pixelShaderConstantsF[i])
655             update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
656         else
657             update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
658     }
659 }
660
661 /** Generate the variable & register declarations for the GLSL output target */
662 static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const shader_reg_maps *reg_maps,
663         SHADER_BUFFER *buffer, const WineD3D_GL_Info *gl_info,
664         const struct ps_compile_args *ps_args)
665 {
666     IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
667     IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) This->baseShader.device;
668     DWORD shader_version = reg_maps->shader_version;
669     unsigned int i, extra_constants_needed = 0;
670     const local_constant *lconst;
671
672     /* There are some minor differences between pixel and vertex shaders */
673     char pshader = shader_is_pshader_version(shader_version);
674     char prefix = pshader ? 'P' : 'V';
675
676     /* Prototype the subroutines */
677     for (i = 0; i < This->baseShader.limits.label; i++) {
678         if (reg_maps->labels[i])
679             shader_addline(buffer, "void subroutine%u();\n", i);
680     }
681
682     /* Declare the constants (aka uniforms) */
683     if (This->baseShader.limits.constant_float > 0) {
684         unsigned max_constantsF = min(This->baseShader.limits.constant_float, 
685                 (pshader ? GL_LIMITS(pshader_constantsF) : GL_LIMITS(vshader_constantsF)));
686         shader_addline(buffer, "uniform vec4 %cC[%u];\n", prefix, max_constantsF);
687     }
688
689     if (This->baseShader.limits.constant_int > 0)
690         shader_addline(buffer, "uniform ivec4 %cI[%u];\n", prefix, This->baseShader.limits.constant_int);
691
692     if (This->baseShader.limits.constant_bool > 0)
693         shader_addline(buffer, "uniform bool %cB[%u];\n", prefix, This->baseShader.limits.constant_bool);
694
695     if(!pshader) {
696         shader_addline(buffer, "uniform vec4 posFixup;\n");
697         /* Predeclaration; This function is added at link time based on the pixel shader.
698          * VS 3.0 shaders have an array OUT[] the shader writes to, earlier versions don't have
699          * that. We know the input to the reorder function at vertex shader compile time, so
700          * we can deal with that. The reorder function for a 1.x and 2.x vertex shader can just
701          * read gl_FrontColor. The output depends on the pixel shader. The reorder function for a
702          * 1.x and 2.x pshader or for fixed function will write gl_FrontColor, and for a 3.0 shader
703          * it will write to the varying array. Here we depend on the shader optimizer on sorting that
704          * out. The nvidia driver only does that if the parameter is inout instead of out, hence the
705          * inout.
706          */
707         if (shader_version >= WINED3DVS_VERSION(3, 0))
708         {
709             shader_addline(buffer, "void order_ps_input(in vec4[%u]);\n", MAX_REG_OUTPUT);
710         } else {
711             shader_addline(buffer, "void order_ps_input();\n");
712         }
713     } else {
714         IWineD3DPixelShaderImpl *ps_impl = (IWineD3DPixelShaderImpl *) This;
715
716         ps_impl->numbumpenvmatconsts = 0;
717         for(i = 0; i < (sizeof(reg_maps->bumpmat) / sizeof(reg_maps->bumpmat[0])); i++) {
718             if(!reg_maps->bumpmat[i]) {
719                 continue;
720             }
721
722             ps_impl->bumpenvmatconst[(int) ps_impl->numbumpenvmatconsts].texunit = i;
723             shader_addline(buffer, "uniform mat2 bumpenvmat%d;\n", i);
724
725             if(reg_maps->luminanceparams) {
726                 ps_impl->luminanceconst[(int) ps_impl->numbumpenvmatconsts].texunit = i;
727                 shader_addline(buffer, "uniform float luminancescale%d;\n", i);
728                 shader_addline(buffer, "uniform float luminanceoffset%d;\n", i);
729                 extra_constants_needed++;
730             } else {
731                 ps_impl->luminanceconst[(int) ps_impl->numbumpenvmatconsts].texunit = -1;
732             }
733
734             extra_constants_needed++;
735             ps_impl->numbumpenvmatconsts++;
736         }
737
738         if(ps_args->srgb_correction) {
739             shader_addline(buffer, "const vec4 srgb_mul_low = vec4(%f, %f, %f, %f);\n",
740                             srgb_mul_low, srgb_mul_low, srgb_mul_low, srgb_mul_low);
741             shader_addline(buffer, "const vec4 srgb_comparison = vec4(%f, %f, %f, %f);\n",
742                             srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp);
743         }
744         if(reg_maps->vpos || reg_maps->usesdsy) {
745             if(This->baseShader.limits.constant_float + extra_constants_needed + 1 < GL_LIMITS(pshader_constantsF)) {
746                 shader_addline(buffer, "uniform vec4 ycorrection;\n");
747                 ((IWineD3DPixelShaderImpl *) This)->vpos_uniform = 1;
748                 extra_constants_needed++;
749             } else {
750                 /* This happens because we do not have proper tracking of the constant registers that are
751                  * actually used, only the max limit of the shader version
752                  */
753                 FIXME("Cannot find a free uniform for vpos correction params\n");
754                 shader_addline(buffer, "const vec4 ycorrection = vec4(%f, %f, 0.0, 0.0);\n",
755                                device->render_offscreen ? 0.0 : ((IWineD3DSurfaceImpl *) device->render_targets[0])->currentDesc.Height,
756                                device->render_offscreen ? 1.0 : -1.0);
757             }
758             shader_addline(buffer, "vec4 vpos;\n");
759         }
760     }
761
762     /* Declare texture samplers */ 
763     for (i = 0; i < This->baseShader.limits.sampler; i++) {
764         if (reg_maps->samplers[i]) {
765
766             DWORD stype = reg_maps->samplers[i] & WINED3DSP_TEXTURETYPE_MASK;
767             switch (stype) {
768
769                 case WINED3DSTT_1D:
770                     shader_addline(buffer, "uniform sampler1D %csampler%u;\n", prefix, i);
771                     break;
772                 case WINED3DSTT_2D:
773                     if(device->stateBlock->textures[i] &&
774                        IWineD3DBaseTexture_GetTextureDimensions(device->stateBlock->textures[i]) == GL_TEXTURE_RECTANGLE_ARB) {
775                         shader_addline(buffer, "uniform sampler2DRect %csampler%u;\n", prefix, i);
776                     } else {
777                         shader_addline(buffer, "uniform sampler2D %csampler%u;\n", prefix, i);
778                     }
779                     break;
780                 case WINED3DSTT_CUBE:
781                     shader_addline(buffer, "uniform samplerCube %csampler%u;\n", prefix, i);
782                     break;
783                 case WINED3DSTT_VOLUME:
784                     shader_addline(buffer, "uniform sampler3D %csampler%u;\n", prefix, i);
785                     break;
786                 default:
787                     shader_addline(buffer, "uniform unsupported_sampler %csampler%u;\n", prefix, i);
788                     FIXME("Unrecognized sampler type: %#x\n", stype);
789                     break;
790             }
791         }
792     }
793     
794     /* Declare address variables */
795     for (i = 0; i < This->baseShader.limits.address; i++) {
796         if (reg_maps->address[i])
797             shader_addline(buffer, "ivec4 A%d;\n", i);
798     }
799
800     /* Declare texture coordinate temporaries and initialize them */
801     for (i = 0; i < This->baseShader.limits.texcoord; i++) {
802         if (reg_maps->texcoord[i]) 
803             shader_addline(buffer, "vec4 T%u = gl_TexCoord[%u];\n", i, i);
804     }
805
806     /* Declare input register varyings. Only pixel shader, vertex shaders have that declared in the
807      * helper function shader that is linked in at link time
808      */
809     if (pshader && shader_version >= WINED3DPS_VERSION(3, 0))
810     {
811         if (use_vs(device->stateBlock))
812         {
813             shader_addline(buffer, "varying vec4 IN[%u];\n", GL_LIMITS(glsl_varyings) / 4);
814         } else {
815             /* TODO: Write a replacement shader for the fixed function vertex pipeline, so this isn't needed.
816              * For fixed function vertex processing + 3.0 pixel shader we need a separate function in the
817              * pixel shader that reads the fixed function color into the packed input registers.
818              */
819             shader_addline(buffer, "vec4 IN[%u];\n", GL_LIMITS(glsl_varyings) / 4);
820         }
821     }
822
823     /* Declare output register temporaries */
824     if(This->baseShader.limits.packed_output) {
825         shader_addline(buffer, "vec4 OUT[%u];\n", This->baseShader.limits.packed_output);
826     }
827
828     /* Declare temporary variables */
829     for(i = 0; i < This->baseShader.limits.temporary; i++) {
830         if (reg_maps->temporary[i])
831             shader_addline(buffer, "vec4 R%u;\n", i);
832     }
833
834     /* Declare attributes */
835     for (i = 0; i < This->baseShader.limits.attributes; i++) {
836         if (reg_maps->attributes[i])
837             shader_addline(buffer, "attribute vec4 attrib%i;\n", i);
838     }
839
840     /* Declare loop registers aLx */
841     for (i = 0; i < reg_maps->loop_depth; i++) {
842         shader_addline(buffer, "int aL%u;\n", i);
843         shader_addline(buffer, "int tmpInt%u;\n", i);
844     }
845
846     /* Temporary variables for matrix operations */
847     shader_addline(buffer, "vec4 tmp0;\n");
848     shader_addline(buffer, "vec4 tmp1;\n");
849
850     /* Local constants use a different name so they can be loaded once at shader link time
851      * They can't be hardcoded into the shader text via LC = {x, y, z, w}; because the
852      * float -> string conversion can cause precision loss.
853      */
854     if(!This->baseShader.load_local_constsF) {
855         LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
856             shader_addline(buffer, "uniform vec4 %cLC%u;\n", prefix, lconst->idx);
857         }
858     }
859
860     /* Start the main program */
861     shader_addline(buffer, "void main() {\n");
862     if(pshader && reg_maps->vpos) {
863         /* DirectX apps expect integer values, while OpenGL drivers add approximately 0.5. This causes
864          * off-by-one problems as spotted by the vPos d3d9 visual test. Unfortunately the ATI cards do
865          * not add exactly 0.5, but rather something like 0.49999999 or 0.50000001, which still causes
866          * precision troubles when we just substract 0.5.
867          *
868          * To deal with that just floor() the position. This will eliminate the fraction on all cards.
869          *
870          * TODO: Test how that behaves with multisampling once we can enable multisampling in winex11.
871          *
872          * An advantage of floor is that it works even if the driver doesn't add 1/2. It is somewhat
873          * questionable if 1.5, 2.5, ... are the proper values to return in gl_FragCoord, even though
874          * coordinates specify the pixel centers instead of the pixel corners. This code will behave
875          * correctly on drivers that returns integer values.
876          */
877         shader_addline(buffer, "vpos = floor(vec4(0, ycorrection[0], 0, 0) + gl_FragCoord * vec4(1, ycorrection[1], 1, 1));\n");
878     }
879 }
880
881 /*****************************************************************************
882  * Functions to generate GLSL strings from DirectX Shader bytecode begin here.
883  *
884  * For more information, see http://wiki.winehq.org/DirectX-Shaders
885  ****************************************************************************/
886
887 /* Prototypes */
888 static void shader_glsl_add_src_param(const SHADER_OPCODE_ARG *arg, const DWORD param,
889         const DWORD addr_token, DWORD mask, glsl_src_param_t *src_param);
890
891 /** Used for opcode modifiers - They multiply the result by the specified amount */
892 static const char * const shift_glsl_tab[] = {
893     "",           /*  0 (none) */ 
894     "2.0 * ",     /*  1 (x2)   */ 
895     "4.0 * ",     /*  2 (x4)   */ 
896     "8.0 * ",     /*  3 (x8)   */ 
897     "16.0 * ",    /*  4 (x16)  */ 
898     "32.0 * ",    /*  5 (x32)  */ 
899     "",           /*  6 (x64)  */ 
900     "",           /*  7 (x128) */ 
901     "",           /*  8 (d256) */ 
902     "",           /*  9 (d128) */ 
903     "",           /* 10 (d64)  */ 
904     "",           /* 11 (d32)  */ 
905     "0.0625 * ",  /* 12 (d16)  */ 
906     "0.125 * ",   /* 13 (d8)   */ 
907     "0.25 * ",    /* 14 (d4)   */ 
908     "0.5 * "      /* 15 (d2)   */ 
909 };
910
911 /* Generate a GLSL parameter that does the input modifier computation and return the input register/mask to use */
912 static void shader_glsl_gen_modifier (
913     const DWORD instr,
914     const char *in_reg,
915     const char *in_regswizzle,
916     char *out_str) {
917
918     out_str[0] = 0;
919     
920     if (instr == WINED3DSIO_TEXKILL)
921         return;
922
923     switch (instr & WINED3DSP_SRCMOD_MASK) {
924     case WINED3DSPSM_DZ: /* Need to handle this in the instructions itself (texld & texcrd). */
925     case WINED3DSPSM_DW:
926     case WINED3DSPSM_NONE:
927         sprintf(out_str, "%s%s", in_reg, in_regswizzle);
928         break;
929     case WINED3DSPSM_NEG:
930         sprintf(out_str, "-%s%s", in_reg, in_regswizzle);
931         break;
932     case WINED3DSPSM_NOT:
933         sprintf(out_str, "!%s%s", in_reg, in_regswizzle);
934         break;
935     case WINED3DSPSM_BIAS:
936         sprintf(out_str, "(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
937         break;
938     case WINED3DSPSM_BIASNEG:
939         sprintf(out_str, "-(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
940         break;
941     case WINED3DSPSM_SIGN:
942         sprintf(out_str, "(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
943         break;
944     case WINED3DSPSM_SIGNNEG:
945         sprintf(out_str, "-(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
946         break;
947     case WINED3DSPSM_COMP:
948         sprintf(out_str, "(1.0 - %s%s)", in_reg, in_regswizzle);
949         break;
950     case WINED3DSPSM_X2:
951         sprintf(out_str, "(2.0 * %s%s)", in_reg, in_regswizzle);
952         break;
953     case WINED3DSPSM_X2NEG:
954         sprintf(out_str, "-(2.0 * %s%s)", in_reg, in_regswizzle);
955         break;
956     case WINED3DSPSM_ABS:
957         sprintf(out_str, "abs(%s%s)", in_reg, in_regswizzle);
958         break;
959     case WINED3DSPSM_ABSNEG:
960         sprintf(out_str, "-abs(%s%s)", in_reg, in_regswizzle);
961         break;
962     default:
963         FIXME("Unhandled modifier %u\n", (instr & WINED3DSP_SRCMOD_MASK));
964         sprintf(out_str, "%s%s", in_reg, in_regswizzle);
965     }
966 }
967
968 /** Writes the GLSL variable name that corresponds to the register that the
969  * DX opcode parameter is trying to access */
970 static void shader_glsl_get_register_name(const DWORD param, const DWORD addr_token,
971         char *regstr, BOOL *is_color, const SHADER_OPCODE_ARG *arg)
972 {
973     /* oPos, oFog and oPts in D3D */
974     static const char * const hwrastout_reg_names[] = { "gl_Position", "gl_FogFragCoord", "gl_PointSize" };
975
976     DWORD reg = param & WINED3DSP_REGNUM_MASK;
977     DWORD regtype = shader_get_regtype(param);
978     IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) arg->shader;
979     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
980     const WineD3D_GL_Info* gl_info = &deviceImpl->adapter->gl_info;
981     DWORD shader_version = This->baseShader.reg_maps.shader_version;
982     char pshader = shader_is_pshader_version(shader_version);
983     char tmpStr[150];
984
985     *is_color = FALSE;   
986  
987     switch (regtype) {
988     case WINED3DSPR_TEMP:
989         sprintf(tmpStr, "R%u", reg);
990     break;
991     case WINED3DSPR_INPUT:
992         if (pshader) {
993             /* Pixel shaders >= 3.0 */
994             if (WINED3DSHADER_VERSION_MAJOR(shader_version) >= 3)
995             {
996                 DWORD in_count = GL_LIMITS(glsl_varyings) / 4;
997
998                 if (param & WINED3DSHADER_ADDRMODE_RELATIVE) {
999                     glsl_src_param_t rel_param;
1000                     shader_glsl_add_src_param(arg, addr_token, 0, WINED3DSP_WRITEMASK_0, &rel_param);
1001
1002                     /* Removing a + 0 would be an obvious optimization, but macos doesn't see the NOP
1003                      * operation there
1004                      */
1005                     if(((IWineD3DPixelShaderImpl *) This)->input_reg_map[reg]) {
1006                         if (((IWineD3DPixelShaderImpl *)This)->declared_in_count > in_count) {
1007                             sprintf(tmpStr, "((%s + %u) > %d ? (%s + %u) > %d ? gl_SecondaryColor : gl_Color : IN[%s + %u])",
1008                                     rel_param.param_str, ((IWineD3DPixelShaderImpl *)This)->input_reg_map[reg], in_count - 1,
1009                                     rel_param.param_str, ((IWineD3DPixelShaderImpl *)This)->input_reg_map[reg], in_count,
1010                                     rel_param.param_str, ((IWineD3DPixelShaderImpl *)This)->input_reg_map[reg]);
1011                         } else {
1012                             sprintf(tmpStr, "IN[%s + %u]", rel_param.param_str, ((IWineD3DPixelShaderImpl *)This)->input_reg_map[reg]);
1013                         }
1014                     } else {
1015                         if (((IWineD3DPixelShaderImpl *)This)->declared_in_count > in_count) {
1016                             sprintf(tmpStr, "((%s) > %d ? (%s) > %d ? gl_SecondaryColor : gl_Color : IN[%s])",
1017                                     rel_param.param_str, in_count - 1,
1018                                     rel_param.param_str, in_count,
1019                                     rel_param.param_str);
1020                         } else {
1021                             sprintf(tmpStr, "IN[%s]", rel_param.param_str);
1022                         }
1023                     }
1024                 } else {
1025                     DWORD idx = ((IWineD3DPixelShaderImpl *) This)->input_reg_map[reg];
1026                     if (idx == in_count) {
1027                         sprintf(tmpStr, "gl_Color");
1028                     } else if (idx == in_count + 1) {
1029                         sprintf(tmpStr, "gl_SecondaryColor");
1030                     } else {
1031                         sprintf(tmpStr, "IN[%u]", idx);
1032                     }
1033                 }
1034             } else {
1035                 if (reg==0)
1036                     strcpy(tmpStr, "gl_Color");
1037                 else
1038                     strcpy(tmpStr, "gl_SecondaryColor");
1039             }
1040         } else {
1041             if (((IWineD3DVertexShaderImpl *)This)->cur_args->swizzle_map & (1 << reg)) *is_color = TRUE;
1042             sprintf(tmpStr, "attrib%u", reg);
1043         } 
1044         break;
1045     case WINED3DSPR_CONST:
1046     {
1047         const char prefix = pshader? 'P':'V';
1048
1049         /* Relative addressing */
1050         if (param & WINED3DSHADER_ADDRMODE_RELATIVE) {
1051
1052            /* Relative addressing on shaders 2.0+ have a relative address token, 
1053             * prior to that, it was hard-coded as "A0.x" because there's only 1 register */
1054            if (WINED3DSHADER_VERSION_MAJOR(shader_version) >= 2)
1055            {
1056                glsl_src_param_t rel_param;
1057                shader_glsl_add_src_param(arg, addr_token, 0, WINED3DSP_WRITEMASK_0, &rel_param);
1058                if(reg) {
1059                    sprintf(tmpStr, "%cC[%s + %u]", prefix, rel_param.param_str, reg);
1060                } else {
1061                    sprintf(tmpStr, "%cC[%s]", prefix, rel_param.param_str);
1062                }
1063            } else {
1064                if(reg) {
1065                    sprintf(tmpStr, "%cC[A0.x + %u]", prefix, reg);
1066                } else {
1067                    sprintf(tmpStr, "%cC[A0.x]", prefix);
1068                }
1069            }
1070
1071         } else {
1072             if(shader_constant_is_local(This, reg)) {
1073                 sprintf(tmpStr, "%cLC%u", prefix, reg);
1074             } else {
1075                 sprintf(tmpStr, "%cC[%u]", prefix, reg);
1076             }
1077         }
1078
1079         break;
1080     }
1081     case WINED3DSPR_CONSTINT:
1082         if (pshader)
1083             sprintf(tmpStr, "PI[%u]", reg);
1084         else
1085             sprintf(tmpStr, "VI[%u]", reg);
1086         break;
1087     case WINED3DSPR_CONSTBOOL:
1088         if (pshader)
1089             sprintf(tmpStr, "PB[%u]", reg);
1090         else
1091             sprintf(tmpStr, "VB[%u]", reg);
1092         break;
1093     case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
1094         if (pshader) {
1095             sprintf(tmpStr, "T%u", reg);
1096         } else {
1097             sprintf(tmpStr, "A%u", reg);
1098         }
1099     break;
1100     case WINED3DSPR_LOOP:
1101         sprintf(tmpStr, "aL%u", This->baseShader.cur_loop_regno - 1);
1102     break;
1103     case WINED3DSPR_SAMPLER:
1104         if (pshader)
1105             sprintf(tmpStr, "Psampler%u", reg);
1106         else
1107             sprintf(tmpStr, "Vsampler%u", reg);
1108     break;
1109     case WINED3DSPR_COLOROUT:
1110         if (reg >= GL_LIMITS(buffers)) {
1111             WARN("Write to render target %u, only %d supported\n", reg, 4);
1112         }
1113         if (GL_SUPPORT(ARB_DRAW_BUFFERS)) {
1114             sprintf(tmpStr, "gl_FragData[%u]", reg);
1115         } else { /* On older cards with GLSL support like the GeforceFX there's only one buffer. */
1116             sprintf(tmpStr, "gl_FragColor");
1117         }
1118     break;
1119     case WINED3DSPR_RASTOUT:
1120         sprintf(tmpStr, "%s", hwrastout_reg_names[reg]);
1121     break;
1122     case WINED3DSPR_DEPTHOUT:
1123         sprintf(tmpStr, "gl_FragDepth");
1124     break;
1125     case WINED3DSPR_ATTROUT:
1126         if (reg == 0) {
1127             sprintf(tmpStr, "gl_FrontColor");
1128         } else {
1129             sprintf(tmpStr, "gl_FrontSecondaryColor");
1130         }
1131     break;
1132     case WINED3DSPR_TEXCRDOUT:
1133         /* Vertex shaders >= 3.0: WINED3DSPR_OUTPUT */
1134         if (WINED3DSHADER_VERSION_MAJOR(shader_version) >= 3) sprintf(tmpStr, "OUT[%u]", reg);
1135         else sprintf(tmpStr, "gl_TexCoord[%u]", reg);
1136     break;
1137     case WINED3DSPR_MISCTYPE:
1138         if (reg == 0) {
1139             /* vPos */
1140             sprintf(tmpStr, "vpos");
1141         } else if (reg == 1){
1142             /* Note that gl_FrontFacing is a bool, while vFace is
1143              * a float for which the sign determines front/back
1144              */
1145             sprintf(tmpStr, "(gl_FrontFacing ? 1.0 : -1.0)");
1146         } else {
1147             FIXME("Unhandled misctype register %d\n", reg);
1148             sprintf(tmpStr, "unrecognized_register");
1149         }
1150         break;
1151     default:
1152         FIXME("Unhandled register name Type(%d)\n", regtype);
1153         sprintf(tmpStr, "unrecognized_register");
1154     break;
1155     }
1156
1157     strcat(regstr, tmpStr);
1158 }
1159
1160 /* Get the GLSL write mask for the destination register */
1161 static DWORD shader_glsl_get_write_mask(const DWORD param, char *write_mask) {
1162     char *ptr = write_mask;
1163     DWORD mask = param & WINED3DSP_WRITEMASK_ALL;
1164
1165     if (shader_is_scalar(param)) {
1166         mask = WINED3DSP_WRITEMASK_0;
1167     } else {
1168         *ptr++ = '.';
1169         if (param & WINED3DSP_WRITEMASK_0) *ptr++ = 'x';
1170         if (param & WINED3DSP_WRITEMASK_1) *ptr++ = 'y';
1171         if (param & WINED3DSP_WRITEMASK_2) *ptr++ = 'z';
1172         if (param & WINED3DSP_WRITEMASK_3) *ptr++ = 'w';
1173     }
1174
1175     *ptr = '\0';
1176
1177     return mask;
1178 }
1179
1180 static unsigned int shader_glsl_get_write_mask_size(DWORD write_mask) {
1181     unsigned int size = 0;
1182
1183     if (write_mask & WINED3DSP_WRITEMASK_0) ++size;
1184     if (write_mask & WINED3DSP_WRITEMASK_1) ++size;
1185     if (write_mask & WINED3DSP_WRITEMASK_2) ++size;
1186     if (write_mask & WINED3DSP_WRITEMASK_3) ++size;
1187
1188     return size;
1189 }
1190
1191 static void shader_glsl_get_swizzle(const DWORD param, BOOL fixup, DWORD mask, char *swizzle_str) {
1192     /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
1193      * but addressed as "rgba". To fix this we need to swap the register's x
1194      * and z components. */
1195     DWORD swizzle = (param & WINED3DSP_SWIZZLE_MASK) >> WINED3DSP_SWIZZLE_SHIFT;
1196     const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
1197     char *ptr = swizzle_str;
1198
1199     if (!shader_is_scalar(param)) {
1200         *ptr++ = '.';
1201         /* swizzle bits fields: wwzzyyxx */
1202         if (mask & WINED3DSP_WRITEMASK_0) *ptr++ = swizzle_chars[swizzle & 0x03];
1203         if (mask & WINED3DSP_WRITEMASK_1) *ptr++ = swizzle_chars[(swizzle >> 2) & 0x03];
1204         if (mask & WINED3DSP_WRITEMASK_2) *ptr++ = swizzle_chars[(swizzle >> 4) & 0x03];
1205         if (mask & WINED3DSP_WRITEMASK_3) *ptr++ = swizzle_chars[(swizzle >> 6) & 0x03];
1206     }
1207
1208     *ptr = '\0';
1209 }
1210
1211 /* From a given parameter token, generate the corresponding GLSL string.
1212  * Also, return the actual register name and swizzle in case the
1213  * caller needs this information as well. */
1214 static void shader_glsl_add_src_param(const SHADER_OPCODE_ARG *arg, const DWORD param,
1215         const DWORD addr_token, DWORD mask, glsl_src_param_t *src_param)
1216 {
1217     BOOL is_color = FALSE;
1218     char swizzle_str[6];
1219
1220     src_param->reg_name[0] = '\0';
1221     src_param->param_str[0] = '\0';
1222     swizzle_str[0] = '\0';
1223
1224     shader_glsl_get_register_name(param, addr_token, src_param->reg_name, &is_color, arg);
1225
1226     shader_glsl_get_swizzle(param, is_color, mask, swizzle_str);
1227     shader_glsl_gen_modifier(param, src_param->reg_name, swizzle_str, src_param->param_str);
1228 }
1229
1230 /* From a given parameter token, generate the corresponding GLSL string.
1231  * Also, return the actual register name and swizzle in case the
1232  * caller needs this information as well. */
1233 static DWORD shader_glsl_add_dst_param(const SHADER_OPCODE_ARG* arg, const DWORD param,
1234         const DWORD addr_token, glsl_dst_param_t *dst_param)
1235 {
1236     BOOL is_color = FALSE;
1237
1238     dst_param->mask_str[0] = '\0';
1239     dst_param->reg_name[0] = '\0';
1240
1241     shader_glsl_get_register_name(param, addr_token, dst_param->reg_name, &is_color, arg);
1242     return shader_glsl_get_write_mask(param, dst_param->mask_str);
1243 }
1244
1245 /* Append the destination part of the instruction to the buffer, return the effective write mask */
1246 static DWORD shader_glsl_append_dst_ext(SHADER_BUFFER *buffer, const SHADER_OPCODE_ARG *arg, const DWORD param)
1247 {
1248     glsl_dst_param_t dst_param;
1249     DWORD mask;
1250     int shift;
1251
1252     mask = shader_glsl_add_dst_param(arg, param, arg->dst_addr, &dst_param);
1253
1254     if(mask) {
1255         shift = (param & WINED3DSP_DSTSHIFT_MASK) >> WINED3DSP_DSTSHIFT_SHIFT;
1256         shader_addline(buffer, "%s%s = %s(", dst_param.reg_name, dst_param.mask_str, shift_glsl_tab[shift]);
1257     }
1258
1259     return mask;
1260 }
1261
1262 /* Append the destination part of the instruction to the buffer, return the effective write mask */
1263 static DWORD shader_glsl_append_dst(SHADER_BUFFER *buffer, const SHADER_OPCODE_ARG *arg)
1264 {
1265     return shader_glsl_append_dst_ext(buffer, arg, arg->dst);
1266 }
1267
1268 /** Process GLSL instruction modifiers */
1269 void shader_glsl_add_instruction_modifiers(const SHADER_OPCODE_ARG* arg)
1270 {
1271     DWORD mask = arg->dst & WINED3DSP_DSTMOD_MASK;
1272  
1273     if (arg->opcode->dst_token && mask != 0) {
1274         glsl_dst_param_t dst_param;
1275
1276         shader_glsl_add_dst_param(arg, arg->dst, 0, &dst_param);
1277
1278         if (mask & WINED3DSPDM_SATURATE) {
1279             /* _SAT means to clamp the value of the register to between 0 and 1 */
1280             shader_addline(arg->buffer, "%s%s = clamp(%s%s, 0.0, 1.0);\n", dst_param.reg_name,
1281                     dst_param.mask_str, dst_param.reg_name, dst_param.mask_str);
1282         }
1283         if (mask & WINED3DSPDM_MSAMPCENTROID) {
1284             FIXME("_centroid modifier not handled\n");
1285         }
1286         if (mask & WINED3DSPDM_PARTIALPRECISION) {
1287             /* MSDN says this modifier can be safely ignored, so that's what we'll do. */
1288         }
1289     }
1290 }
1291
1292 static inline const char* shader_get_comp_op(
1293     const DWORD opcode) {
1294
1295     DWORD op = (opcode & INST_CONTROLS_MASK) >> INST_CONTROLS_SHIFT;
1296     switch (op) {
1297         case COMPARISON_GT: return ">";
1298         case COMPARISON_EQ: return "==";
1299         case COMPARISON_GE: return ">=";
1300         case COMPARISON_LT: return "<";
1301         case COMPARISON_NE: return "!=";
1302         case COMPARISON_LE: return "<=";
1303         default:
1304             FIXME("Unrecognized comparison value: %u\n", op);
1305             return "(\?\?)";
1306     }
1307 }
1308
1309 static void shader_glsl_get_sample_function(DWORD sampler_type, DWORD flags, glsl_sample_function_t *sample_function)
1310 {
1311     BOOL projected = flags & WINED3D_GLSL_SAMPLE_PROJECTED;
1312     BOOL texrect = flags & WINED3D_GLSL_SAMPLE_RECT;
1313     BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD;
1314
1315     /* Note that there's no such thing as a projected cube texture. */
1316     switch(sampler_type) {
1317         case WINED3DSTT_1D:
1318             if(lod) {
1319                 sample_function->name = projected ? "texture1DProjLod" : "texture1DLod";
1320             } else {
1321                 sample_function->name = projected ? "texture1DProj" : "texture1D";
1322             }
1323             sample_function->coord_mask = WINED3DSP_WRITEMASK_0;
1324             break;
1325         case WINED3DSTT_2D:
1326             if(texrect) {
1327                 if(lod) {
1328                     sample_function->name = projected ? "texture2DRectProjLod" : "texture2DRectLod";
1329                 } else {
1330                     sample_function->name = projected ? "texture2DRectProj" : "texture2DRect";
1331                 }
1332             } else {
1333                 if(lod) {
1334                     sample_function->name = projected ? "texture2DProjLod" : "texture2DLod";
1335                 } else {
1336                     sample_function->name = projected ? "texture2DProj" : "texture2D";
1337                 }
1338             }
1339             sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1;
1340             break;
1341         case WINED3DSTT_CUBE:
1342             if(lod) {
1343                 sample_function->name = "textureCubeLod";
1344             } else {
1345                 sample_function->name = "textureCube";
1346             }
1347             sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1348             break;
1349         case WINED3DSTT_VOLUME:
1350             if(lod) {
1351                 sample_function->name = projected ? "texture3DProjLod" : "texture3DLod";
1352             } else {
1353                 sample_function->name = projected ? "texture3DProj" : "texture3D";
1354             }
1355             sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1356             break;
1357         default:
1358             sample_function->name = "";
1359             sample_function->coord_mask = 0;
1360             FIXME("Unrecognized sampler type: %#x;\n", sampler_type);
1361             break;
1362     }
1363 }
1364
1365 static void shader_glsl_append_fixup_arg(char *arguments, const char *reg_name,
1366         BOOL sign_fixup, enum fixup_channel_source channel_source)
1367 {
1368     switch(channel_source)
1369     {
1370         case CHANNEL_SOURCE_ZERO:
1371             strcat(arguments, "0.0");
1372             break;
1373
1374         case CHANNEL_SOURCE_ONE:
1375             strcat(arguments, "1.0");
1376             break;
1377
1378         case CHANNEL_SOURCE_X:
1379             strcat(arguments, reg_name);
1380             strcat(arguments, ".x");
1381             break;
1382
1383         case CHANNEL_SOURCE_Y:
1384             strcat(arguments, reg_name);
1385             strcat(arguments, ".y");
1386             break;
1387
1388         case CHANNEL_SOURCE_Z:
1389             strcat(arguments, reg_name);
1390             strcat(arguments, ".z");
1391             break;
1392
1393         case CHANNEL_SOURCE_W:
1394             strcat(arguments, reg_name);
1395             strcat(arguments, ".w");
1396             break;
1397
1398         default:
1399             FIXME("Unhandled channel source %#x\n", channel_source);
1400             strcat(arguments, "undefined");
1401             break;
1402     }
1403
1404     if (sign_fixup) strcat(arguments, " * 2.0 - 1.0");
1405 }
1406
1407 static void shader_glsl_color_correction(const struct SHADER_OPCODE_ARG *arg, struct color_fixup_desc fixup)
1408 {
1409     unsigned int mask_size, remaining;
1410     glsl_dst_param_t dst_param;
1411     char arguments[256];
1412     DWORD mask;
1413     BOOL dummy;
1414
1415     mask = 0;
1416     if (fixup.x_sign_fixup || fixup.x_source != CHANNEL_SOURCE_X) mask |= WINED3DSP_WRITEMASK_0;
1417     if (fixup.y_sign_fixup || fixup.y_source != CHANNEL_SOURCE_Y) mask |= WINED3DSP_WRITEMASK_1;
1418     if (fixup.z_sign_fixup || fixup.z_source != CHANNEL_SOURCE_Z) mask |= WINED3DSP_WRITEMASK_2;
1419     if (fixup.w_sign_fixup || fixup.w_source != CHANNEL_SOURCE_W) mask |= WINED3DSP_WRITEMASK_3;
1420     mask &= arg->dst;
1421
1422     if (!mask) return; /* Nothing to do */
1423
1424     if (is_yuv_fixup(fixup))
1425     {
1426         enum yuv_fixup yuv_fixup = get_yuv_fixup(fixup);
1427         FIXME("YUV fixup (%#x) not supported\n", yuv_fixup);
1428         return;
1429     }
1430
1431     mask_size = shader_glsl_get_write_mask_size(mask);
1432
1433     dst_param.mask_str[0] = '\0';
1434     shader_glsl_get_write_mask(mask, dst_param.mask_str);
1435
1436     dst_param.reg_name[0] = '\0';
1437     shader_glsl_get_register_name(arg->dst, arg->dst_addr, dst_param.reg_name, &dummy, arg);
1438
1439     arguments[0] = '\0';
1440     remaining = mask_size;
1441     if (mask & WINED3DSP_WRITEMASK_0)
1442     {
1443         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.x_sign_fixup, fixup.x_source);
1444         if (--remaining) strcat(arguments, ", ");
1445     }
1446     if (mask & WINED3DSP_WRITEMASK_1)
1447     {
1448         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.y_sign_fixup, fixup.y_source);
1449         if (--remaining) strcat(arguments, ", ");
1450     }
1451     if (mask & WINED3DSP_WRITEMASK_2)
1452     {
1453         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.z_sign_fixup, fixup.z_source);
1454         if (--remaining) strcat(arguments, ", ");
1455     }
1456     if (mask & WINED3DSP_WRITEMASK_3)
1457     {
1458         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.w_sign_fixup, fixup.w_source);
1459         if (--remaining) strcat(arguments, ", ");
1460     }
1461
1462     if (mask_size > 1)
1463     {
1464         shader_addline(arg->buffer, "%s%s = vec%u(%s);\n",
1465                 dst_param.reg_name, dst_param.mask_str, mask_size, arguments);
1466     }
1467     else
1468     {
1469         shader_addline(arg->buffer, "%s%s = %s;\n", dst_param.reg_name, dst_param.mask_str, arguments);
1470     }
1471 }
1472
1473 static void PRINTF_ATTR(6, 7) shader_glsl_gen_sample_code(const SHADER_OPCODE_ARG *arg,
1474         DWORD sampler, const glsl_sample_function_t *sample_function, DWORD swizzle,
1475         const char *bias, const char *coord_reg_fmt, ...)
1476 {
1477     const char *sampler_base;
1478     char dst_swizzle[6];
1479     struct color_fixup_desc fixup;
1480     va_list args;
1481
1482     shader_glsl_get_swizzle(swizzle, FALSE, arg->dst, dst_swizzle);
1483
1484     if(shader_is_pshader_version(arg->reg_maps->shader_version)) {
1485         IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *) arg->shader;
1486         fixup = This->cur_args->color_fixup[sampler];
1487         sampler_base = "Psampler";
1488     } else {
1489         sampler_base = "Vsampler";
1490         fixup = COLOR_FIXUP_IDENTITY; /* FIXME: Vshader color fixup */
1491     }
1492
1493     shader_glsl_append_dst(arg->buffer, arg);
1494
1495     shader_addline(arg->buffer, "%s(%s%u, ", sample_function->name, sampler_base, sampler);
1496
1497     va_start(args, coord_reg_fmt);
1498     shader_vaddline(arg->buffer, coord_reg_fmt, args);
1499     va_end(args);
1500
1501     if(bias) {
1502         shader_addline(arg->buffer, ", %s)%s);\n", bias, dst_swizzle);
1503     } else {
1504         shader_addline(arg->buffer, ")%s);\n", dst_swizzle);
1505     }
1506
1507     if(!is_identity_fixup(fixup)) {
1508         shader_glsl_color_correction(arg, fixup);
1509     }
1510 }
1511
1512 /*****************************************************************************
1513  * 
1514  * Begin processing individual instruction opcodes
1515  * 
1516  ****************************************************************************/
1517
1518 /* Generate GLSL arithmetic functions (dst = src1 + src2) */
1519 static void shader_glsl_arith(const SHADER_OPCODE_ARG *arg)
1520 {
1521     CONST SHADER_OPCODE* curOpcode = arg->opcode;
1522     SHADER_BUFFER* buffer = arg->buffer;
1523     glsl_src_param_t src0_param;
1524     glsl_src_param_t src1_param;
1525     DWORD write_mask;
1526     char op;
1527
1528     /* Determine the GLSL operator to use based on the opcode */
1529     switch (curOpcode->opcode) {
1530         case WINED3DSIO_MUL: op = '*'; break;
1531         case WINED3DSIO_ADD: op = '+'; break;
1532         case WINED3DSIO_SUB: op = '-'; break;
1533         default:
1534             op = ' ';
1535             FIXME("Opcode %s not yet handled in GLSL\n", curOpcode->name);
1536             break;
1537     }
1538
1539     write_mask = shader_glsl_append_dst(buffer, arg);
1540     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
1541     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1542     shader_addline(buffer, "%s %c %s);\n", src0_param.param_str, op, src1_param.param_str);
1543 }
1544
1545 /* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
1546 static void shader_glsl_mov(const SHADER_OPCODE_ARG *arg)
1547 {
1548     SHADER_BUFFER* buffer = arg->buffer;
1549     glsl_src_param_t src0_param;
1550     DWORD write_mask;
1551
1552     write_mask = shader_glsl_append_dst(buffer, arg);
1553     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
1554
1555     /* In vs_1_1 WINED3DSIO_MOV can write to the address register. In later
1556      * shader versions WINED3DSIO_MOVA is used for this. */
1557     if ((WINED3DSHADER_VERSION_MAJOR(arg->reg_maps->shader_version) == 1
1558             && !shader_is_pshader_version(arg->reg_maps->shader_version)
1559             && shader_get_regtype(arg->dst) == WINED3DSPR_ADDR))
1560     {
1561         /* This is a simple floor() */
1562         unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
1563         if (mask_size > 1) {
1564             shader_addline(buffer, "ivec%d(floor(%s)));\n", mask_size, src0_param.param_str);
1565         } else {
1566             shader_addline(buffer, "int(floor(%s)));\n", src0_param.param_str);
1567         }
1568     } else if(arg->opcode->opcode == WINED3DSIO_MOVA) {
1569         /* We need to *round* to the nearest int here. */
1570         unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
1571         if (mask_size > 1) {
1572             shader_addline(buffer, "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s)));\n", mask_size, src0_param.param_str, mask_size, src0_param.param_str);
1573         } else {
1574             shader_addline(buffer, "int(floor(abs(%s) + 0.5) * sign(%s)));\n", src0_param.param_str, src0_param.param_str);
1575         }
1576     } else {
1577         shader_addline(buffer, "%s);\n", src0_param.param_str);
1578     }
1579 }
1580
1581 /* Process the dot product operators DP3 and DP4 in GLSL (dst = dot(src0, src1)) */
1582 static void shader_glsl_dot(const SHADER_OPCODE_ARG *arg)
1583 {
1584     CONST SHADER_OPCODE* curOpcode = arg->opcode;
1585     SHADER_BUFFER* buffer = arg->buffer;
1586     glsl_src_param_t src0_param;
1587     glsl_src_param_t src1_param;
1588     DWORD dst_write_mask, src_write_mask;
1589     unsigned int dst_size = 0;
1590
1591     dst_write_mask = shader_glsl_append_dst(buffer, arg);
1592     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
1593
1594     /* dp3 works on vec3, dp4 on vec4 */
1595     if (curOpcode->opcode == WINED3DSIO_DP4) {
1596         src_write_mask = WINED3DSP_WRITEMASK_ALL;
1597     } else {
1598         src_write_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1599     }
1600
1601     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_write_mask, &src0_param);
1602     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], src_write_mask, &src1_param);
1603
1604     if (dst_size > 1) {
1605         shader_addline(buffer, "vec%d(dot(%s, %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
1606     } else {
1607         shader_addline(buffer, "dot(%s, %s));\n", src0_param.param_str, src1_param.param_str);
1608     }
1609 }
1610
1611 /* Note that this instruction has some restrictions. The destination write mask
1612  * can't contain the w component, and the source swizzles have to be .xyzw */
1613 static void shader_glsl_cross(const SHADER_OPCODE_ARG *arg)
1614 {
1615     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1616     glsl_src_param_t src0_param;
1617     glsl_src_param_t src1_param;
1618     char dst_mask[6];
1619
1620     shader_glsl_get_write_mask(arg->dst, dst_mask);
1621     shader_glsl_append_dst(arg->buffer, arg);
1622     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
1623     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], src_mask, &src1_param);
1624     shader_addline(arg->buffer, "cross(%s, %s)%s);\n", src0_param.param_str, src1_param.param_str, dst_mask);
1625 }
1626
1627 /* Process the WINED3DSIO_POW instruction in GLSL (dst = |src0|^src1)
1628  * Src0 and src1 are scalars. Note that D3D uses the absolute of src0, while
1629  * GLSL uses the value as-is. */
1630 static void shader_glsl_pow(const SHADER_OPCODE_ARG *arg)
1631 {
1632     SHADER_BUFFER *buffer = arg->buffer;
1633     glsl_src_param_t src0_param;
1634     glsl_src_param_t src1_param;
1635     DWORD dst_write_mask;
1636     unsigned int dst_size;
1637
1638     dst_write_mask = shader_glsl_append_dst(buffer, arg);
1639     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
1640
1641     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
1642     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0, &src1_param);
1643
1644     if (dst_size > 1) {
1645         shader_addline(buffer, "vec%d(pow(abs(%s), %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
1646     } else {
1647         shader_addline(buffer, "pow(abs(%s), %s));\n", src0_param.param_str, src1_param.param_str);
1648     }
1649 }
1650
1651 /* Process the WINED3DSIO_LOG instruction in GLSL (dst = log2(|src0|))
1652  * Src0 is a scalar. Note that D3D uses the absolute of src0, while
1653  * GLSL uses the value as-is. */
1654 static void shader_glsl_log(const SHADER_OPCODE_ARG *arg)
1655 {
1656     SHADER_BUFFER *buffer = arg->buffer;
1657     glsl_src_param_t src0_param;
1658     DWORD dst_write_mask;
1659     unsigned int dst_size;
1660
1661     dst_write_mask = shader_glsl_append_dst(buffer, arg);
1662     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
1663
1664     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
1665
1666     if (dst_size > 1) {
1667         shader_addline(buffer, "vec%d(log2(abs(%s))));\n", dst_size, src0_param.param_str);
1668     } else {
1669         shader_addline(buffer, "log2(abs(%s)));\n", src0_param.param_str);
1670     }
1671 }
1672
1673 /* Map the opcode 1-to-1 to the GL code (arg->dst = instruction(src0, src1, ...) */
1674 static void shader_glsl_map2gl(const SHADER_OPCODE_ARG *arg)
1675 {
1676     CONST SHADER_OPCODE* curOpcode = arg->opcode;
1677     SHADER_BUFFER* buffer = arg->buffer;
1678     glsl_src_param_t src_param;
1679     const char *instruction;
1680     DWORD write_mask;
1681     unsigned i;
1682
1683     /* Determine the GLSL function to use based on the opcode */
1684     /* TODO: Possibly make this a table for faster lookups */
1685     switch (curOpcode->opcode) {
1686         case WINED3DSIO_MIN: instruction = "min"; break;
1687         case WINED3DSIO_MAX: instruction = "max"; break;
1688         case WINED3DSIO_ABS: instruction = "abs"; break;
1689         case WINED3DSIO_FRC: instruction = "fract"; break;
1690         case WINED3DSIO_NRM: instruction = "normalize"; break;
1691         case WINED3DSIO_EXP: instruction = "exp2"; break;
1692         case WINED3DSIO_SGN: instruction = "sign"; break;
1693         case WINED3DSIO_DSX: instruction = "dFdx"; break;
1694         case WINED3DSIO_DSY: instruction = "ycorrection.y * dFdy"; break;
1695         default: instruction = "";
1696             FIXME("Opcode %s not yet handled in GLSL\n", curOpcode->name);
1697             break;
1698     }
1699
1700     write_mask = shader_glsl_append_dst(buffer, arg);
1701
1702     shader_addline(buffer, "%s(", instruction);
1703
1704     if (curOpcode->num_params > 0) {
1705         shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src_param);
1706         shader_addline(buffer, "%s", src_param.param_str);
1707         for (i = 2; i < curOpcode->num_params; ++i) {
1708             shader_glsl_add_src_param(arg, arg->src[i-1], arg->src_addr[i-1], write_mask, &src_param);
1709             shader_addline(buffer, ", %s", src_param.param_str);
1710         }
1711     }
1712
1713     shader_addline(buffer, "));\n");
1714 }
1715
1716 /** Process the WINED3DSIO_EXPP instruction in GLSL:
1717  * For shader model 1.x, do the following (and honor the writemask, so use a temporary variable):
1718  *   dst.x = 2^(floor(src))
1719  *   dst.y = src - floor(src)
1720  *   dst.z = 2^src   (partial precision is allowed, but optional)
1721  *   dst.w = 1.0;
1722  * For 2.0 shaders, just do this (honoring writemask and swizzle):
1723  *   dst = 2^src;    (partial precision is allowed, but optional)
1724  */
1725 static void shader_glsl_expp(const SHADER_OPCODE_ARG *arg)
1726 {
1727     glsl_src_param_t src_param;
1728
1729     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src_param);
1730
1731     if (arg->reg_maps->shader_version < WINED3DPS_VERSION(2,0))
1732     {
1733         char dst_mask[6];
1734
1735         shader_addline(arg->buffer, "tmp0.x = exp2(floor(%s));\n", src_param.param_str);
1736         shader_addline(arg->buffer, "tmp0.y = %s - floor(%s);\n", src_param.param_str, src_param.param_str);
1737         shader_addline(arg->buffer, "tmp0.z = exp2(%s);\n", src_param.param_str);
1738         shader_addline(arg->buffer, "tmp0.w = 1.0;\n");
1739
1740         shader_glsl_append_dst(arg->buffer, arg);
1741         shader_glsl_get_write_mask(arg->dst, dst_mask);
1742         shader_addline(arg->buffer, "tmp0%s);\n", dst_mask);
1743     } else {
1744         DWORD write_mask;
1745         unsigned int mask_size;
1746
1747         write_mask = shader_glsl_append_dst(arg->buffer, arg);
1748         mask_size = shader_glsl_get_write_mask_size(write_mask);
1749
1750         if (mask_size > 1) {
1751             shader_addline(arg->buffer, "vec%d(exp2(%s)));\n", mask_size, src_param.param_str);
1752         } else {
1753             shader_addline(arg->buffer, "exp2(%s));\n", src_param.param_str);
1754         }
1755     }
1756 }
1757
1758 /** Process the RCP (reciprocal or inverse) opcode in GLSL (dst = 1 / src) */
1759 static void shader_glsl_rcp(const SHADER_OPCODE_ARG *arg)
1760 {
1761     glsl_src_param_t src_param;
1762     DWORD write_mask;
1763     unsigned int mask_size;
1764
1765     write_mask = shader_glsl_append_dst(arg->buffer, arg);
1766     mask_size = shader_glsl_get_write_mask_size(write_mask);
1767     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &src_param);
1768
1769     if (mask_size > 1) {
1770         shader_addline(arg->buffer, "vec%d(1.0 / %s));\n", mask_size, src_param.param_str);
1771     } else {
1772         shader_addline(arg->buffer, "1.0 / %s);\n", src_param.param_str);
1773     }
1774 }
1775
1776 static void shader_glsl_rsq(const SHADER_OPCODE_ARG *arg)
1777 {
1778     SHADER_BUFFER* buffer = arg->buffer;
1779     glsl_src_param_t src_param;
1780     DWORD write_mask;
1781     unsigned int mask_size;
1782
1783     write_mask = shader_glsl_append_dst(buffer, arg);
1784     mask_size = shader_glsl_get_write_mask_size(write_mask);
1785
1786     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &src_param);
1787
1788     if (mask_size > 1) {
1789         shader_addline(buffer, "vec%d(inversesqrt(%s)));\n", mask_size, src_param.param_str);
1790     } else {
1791         shader_addline(buffer, "inversesqrt(%s));\n", src_param.param_str);
1792     }
1793 }
1794
1795 /** Process signed comparison opcodes in GLSL. */
1796 static void shader_glsl_compare(const SHADER_OPCODE_ARG *arg)
1797 {
1798     glsl_src_param_t src0_param;
1799     glsl_src_param_t src1_param;
1800     DWORD write_mask;
1801     unsigned int mask_size;
1802
1803     write_mask = shader_glsl_append_dst(arg->buffer, arg);
1804     mask_size = shader_glsl_get_write_mask_size(write_mask);
1805     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
1806     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1807
1808     if (mask_size > 1) {
1809         const char *compare;
1810
1811         switch(arg->opcode->opcode) {
1812             case WINED3DSIO_SLT: compare = "lessThan"; break;
1813             case WINED3DSIO_SGE: compare = "greaterThanEqual"; break;
1814             default: compare = "";
1815                 FIXME("Can't handle opcode %s\n", arg->opcode->name);
1816         }
1817
1818         shader_addline(arg->buffer, "vec%d(%s(%s, %s)));\n", mask_size, compare,
1819                 src0_param.param_str, src1_param.param_str);
1820     } else {
1821         switch(arg->opcode->opcode) {
1822             case WINED3DSIO_SLT:
1823                 /* Step(src0, src1) is not suitable here because if src0 == src1 SLT is supposed,
1824                  * to return 0.0 but step returns 1.0 because step is not < x
1825                  * An alternative is a bvec compare padded with an unused second component.
1826                  * step(src1 * -1.0, src0 * -1.0) is not an option because it suffers from the same
1827                  * issue. Playing with not() is not possible either because not() does not accept
1828                  * a scalar.
1829                  */
1830                 shader_addline(arg->buffer, "(%s < %s) ? 1.0 : 0.0);\n", src0_param.param_str, src1_param.param_str);
1831                 break;
1832             case WINED3DSIO_SGE:
1833                 /* Here we can use the step() function and safe a conditional */
1834                 shader_addline(arg->buffer, "step(%s, %s));\n", src1_param.param_str, src0_param.param_str);
1835                 break;
1836             default:
1837                 FIXME("Can't handle opcode %s\n", arg->opcode->name);
1838         }
1839
1840     }
1841 }
1842
1843 /** Process CMP instruction in GLSL (dst = src0 >= 0.0 ? src1 : src2), per channel */
1844 static void shader_glsl_cmp(const SHADER_OPCODE_ARG *arg)
1845 {
1846     glsl_src_param_t src0_param;
1847     glsl_src_param_t src1_param;
1848     glsl_src_param_t src2_param;
1849     DWORD write_mask, cmp_channel = 0;
1850     unsigned int i, j;
1851     char mask_char[6];
1852     BOOL temp_destination = FALSE;
1853
1854     if(shader_is_scalar(arg->src[0])) {
1855         write_mask = shader_glsl_append_dst(arg->buffer, arg);
1856
1857         shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
1858         shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1859         shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1860
1861         shader_addline(arg->buffer, "%s >= 0.0 ? %s : %s);\n",
1862                        src0_param.param_str, src1_param.param_str, src2_param.param_str);
1863     } else {
1864         DWORD src0reg = arg->src[0] & WINED3DSP_REGNUM_MASK;
1865         DWORD src1reg = arg->src[1] & WINED3DSP_REGNUM_MASK;
1866         DWORD src2reg = arg->src[2] & WINED3DSP_REGNUM_MASK;
1867         DWORD src0regtype = shader_get_regtype(arg->src[0]);
1868         DWORD src1regtype = shader_get_regtype(arg->src[1]);
1869         DWORD src2regtype = shader_get_regtype(arg->src[2]);
1870         DWORD dstreg = arg->dst & WINED3DSP_REGNUM_MASK;
1871         DWORD dstregtype = shader_get_regtype(arg->dst);
1872
1873         /* Cycle through all source0 channels */
1874         for (i=0; i<4; i++) {
1875             write_mask = 0;
1876             /* Find the destination channels which use the current source0 channel */
1877             for (j=0; j<4; j++) {
1878                 if ( ((arg->src[0] >> (WINED3DSP_SWIZZLE_SHIFT + 2*j)) & 0x3) == i ) {
1879                     write_mask |= WINED3DSP_WRITEMASK_0 << j;
1880                     cmp_channel = WINED3DSP_WRITEMASK_0 << j;
1881                 }
1882             }
1883
1884             /* Splitting the cmp instruction up in multiple lines imposes a problem:
1885             * The first lines may overwrite source parameters of the following lines.
1886             * Deal with that by using a temporary destination register if needed
1887             */
1888             if((src0reg == dstreg && src0regtype == dstregtype) ||
1889             (src1reg == dstreg && src1regtype == dstregtype) ||
1890             (src2reg == dstreg && src2regtype == dstregtype)) {
1891
1892                 write_mask = shader_glsl_get_write_mask(arg->dst & (~WINED3DSP_SWIZZLE_MASK | write_mask), mask_char);
1893                 if (!write_mask) continue;
1894                 shader_addline(arg->buffer, "tmp0%s = (", mask_char);
1895                 temp_destination = TRUE;
1896             } else {
1897                 write_mask = shader_glsl_append_dst_ext(arg->buffer, arg, arg->dst & (~WINED3DSP_SWIZZLE_MASK | write_mask));
1898                 if (!write_mask) continue;
1899             }
1900
1901             shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], cmp_channel, &src0_param);
1902             shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1903             shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1904
1905             shader_addline(arg->buffer, "%s >= 0.0 ? %s : %s);\n",
1906                         src0_param.param_str, src1_param.param_str, src2_param.param_str);
1907         }
1908
1909         if(temp_destination) {
1910             shader_glsl_get_write_mask(arg->dst, mask_char);
1911             shader_glsl_append_dst_ext(arg->buffer, arg, arg->dst);
1912             shader_addline(arg->buffer, "tmp0%s);\n", mask_char);
1913         }
1914     }
1915
1916 }
1917
1918 /** Process the CND opcode in GLSL (dst = (src0 > 0.5) ? src1 : src2) */
1919 /* For ps 1.1-1.3, only a single component of src0 is used. For ps 1.4
1920  * the compare is done per component of src0. */
1921 static void shader_glsl_cnd(const SHADER_OPCODE_ARG *arg)
1922 {
1923     glsl_src_param_t src0_param;
1924     glsl_src_param_t src1_param;
1925     glsl_src_param_t src2_param;
1926     DWORD write_mask, cmp_channel = 0;
1927     unsigned int i, j;
1928
1929     if (arg->reg_maps->shader_version < WINED3DPS_VERSION(1, 4))
1930     {
1931         write_mask = shader_glsl_append_dst(arg->buffer, arg);
1932         shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
1933         shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1934         shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1935
1936         /* Fun: The D3DSI_COISSUE flag changes the semantic of the cnd instruction for < 1.4 shaders */
1937         if(arg->opcode_token & WINED3DSI_COISSUE) {
1938             shader_addline(arg->buffer, "%s /* COISSUE! */);\n", src1_param.param_str);
1939         } else {
1940             shader_addline(arg->buffer, "%s > 0.5 ? %s : %s);\n",
1941                     src0_param.param_str, src1_param.param_str, src2_param.param_str);
1942         }
1943         return;
1944     }
1945     /* Cycle through all source0 channels */
1946     for (i=0; i<4; i++) {
1947         write_mask = 0;
1948         /* Find the destination channels which use the current source0 channel */
1949         for (j=0; j<4; j++) {
1950             if ( ((arg->src[0] >> (WINED3DSP_SWIZZLE_SHIFT + 2*j)) & 0x3) == i ) {
1951                 write_mask |= WINED3DSP_WRITEMASK_0 << j;
1952                 cmp_channel = WINED3DSP_WRITEMASK_0 << j;
1953             }
1954         }
1955         write_mask = shader_glsl_append_dst_ext(arg->buffer, arg, arg->dst & (~WINED3DSP_SWIZZLE_MASK | write_mask));
1956         if (!write_mask) continue;
1957
1958         shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], cmp_channel, &src0_param);
1959         shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1960         shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1961
1962         shader_addline(arg->buffer, "%s > 0.5 ? %s : %s);\n",
1963                 src0_param.param_str, src1_param.param_str, src2_param.param_str);
1964     }
1965 }
1966
1967 /** GLSL code generation for WINED3DSIO_MAD: Multiply the first 2 opcodes, then add the last */
1968 static void shader_glsl_mad(const SHADER_OPCODE_ARG *arg)
1969 {
1970     glsl_src_param_t src0_param;
1971     glsl_src_param_t src1_param;
1972     glsl_src_param_t src2_param;
1973     DWORD write_mask;
1974
1975     write_mask = shader_glsl_append_dst(arg->buffer, arg);
1976     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
1977     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1978     shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1979     shader_addline(arg->buffer, "(%s * %s) + %s);\n",
1980             src0_param.param_str, src1_param.param_str, src2_param.param_str);
1981 }
1982
1983 /** Handles transforming all WINED3DSIO_M?x? opcodes for 
1984     Vertex shaders to GLSL codes */
1985 static void shader_glsl_mnxn(const SHADER_OPCODE_ARG *arg)
1986 {
1987     IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)arg->shader;
1988     const SHADER_OPCODE *opcode_table = shader->baseShader.shader_ins;
1989     DWORD shader_version = arg->reg_maps->shader_version;
1990     int i;
1991     int nComponents = 0;
1992     SHADER_OPCODE_ARG tmpArg;
1993    
1994     memset(&tmpArg, 0, sizeof(SHADER_OPCODE_ARG));
1995
1996     /* Set constants for the temporary argument */
1997     tmpArg.shader      = arg->shader;
1998     tmpArg.buffer      = arg->buffer;
1999     tmpArg.src[0]      = arg->src[0];
2000     tmpArg.src_addr[0] = arg->src_addr[0];
2001     tmpArg.src_addr[1] = arg->src_addr[1];
2002     tmpArg.reg_maps = arg->reg_maps; 
2003     
2004     switch(arg->opcode->opcode) {
2005         case WINED3DSIO_M4x4:
2006             nComponents = 4;
2007             tmpArg.opcode = shader_get_opcode(opcode_table, shader_version, WINED3DSIO_DP4);
2008             break;
2009         case WINED3DSIO_M4x3:
2010             nComponents = 3;
2011             tmpArg.opcode = shader_get_opcode(opcode_table, shader_version, WINED3DSIO_DP4);
2012             break;
2013         case WINED3DSIO_M3x4:
2014             nComponents = 4;
2015             tmpArg.opcode = shader_get_opcode(opcode_table, shader_version, WINED3DSIO_DP3);
2016             break;
2017         case WINED3DSIO_M3x3:
2018             nComponents = 3;
2019             tmpArg.opcode = shader_get_opcode(opcode_table, shader_version, WINED3DSIO_DP3);
2020             break;
2021         case WINED3DSIO_M3x2:
2022             nComponents = 2;
2023             tmpArg.opcode = shader_get_opcode(opcode_table, shader_version, WINED3DSIO_DP3);
2024             break;
2025         default:
2026             break;
2027     }
2028
2029     for (i = 0; i < nComponents; i++) {
2030         tmpArg.dst = ((arg->dst) & ~WINED3DSP_WRITEMASK_ALL)|(WINED3DSP_WRITEMASK_0<<i);
2031         tmpArg.src[1]      = arg->src[1]+i;
2032         shader_glsl_dot(&tmpArg);
2033     }
2034 }
2035
2036 /**
2037     The LRP instruction performs a component-wise linear interpolation 
2038     between the second and third operands using the first operand as the
2039     blend factor.  Equation:  (dst = src2 + src0 * (src1 - src2))
2040     This is equivalent to mix(src2, src1, src0);
2041 */
2042 static void shader_glsl_lrp(const SHADER_OPCODE_ARG *arg)
2043 {
2044     glsl_src_param_t src0_param;
2045     glsl_src_param_t src1_param;
2046     glsl_src_param_t src2_param;
2047     DWORD write_mask;
2048
2049     write_mask = shader_glsl_append_dst(arg->buffer, arg);
2050
2051     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
2052     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
2053     shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
2054
2055     shader_addline(arg->buffer, "mix(%s, %s, %s));\n",
2056             src2_param.param_str, src1_param.param_str, src0_param.param_str);
2057 }
2058
2059 /** Process the WINED3DSIO_LIT instruction in GLSL:
2060  * dst.x = dst.w = 1.0
2061  * dst.y = (src0.x > 0) ? src0.x
2062  * dst.z = (src0.x > 0) ? ((src0.y > 0) ? pow(src0.y, src.w) : 0) : 0
2063  *                                        where src.w is clamped at +- 128
2064  */
2065 static void shader_glsl_lit(const SHADER_OPCODE_ARG *arg)
2066 {
2067     glsl_src_param_t src0_param;
2068     glsl_src_param_t src1_param;
2069     glsl_src_param_t src3_param;
2070     char dst_mask[6];
2071
2072     shader_glsl_append_dst(arg->buffer, arg);
2073     shader_glsl_get_write_mask(arg->dst, dst_mask);
2074
2075     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
2076     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_1, &src1_param);
2077     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &src3_param);
2078
2079     /* The sdk specifies the instruction like this
2080      * dst.x = 1.0;
2081      * if(src.x > 0.0) dst.y = src.x
2082      * else dst.y = 0.0.
2083      * if(src.x > 0.0 && src.y > 0.0) dst.z = pow(src.y, power);
2084      * else dst.z = 0.0;
2085      * dst.w = 1.0;
2086      *
2087      * Obviously that has quite a few conditionals in it which we don't like. So the first step is this:
2088      * dst.x = 1.0                                  ... No further explanation needed
2089      * dst.y = max(src.y, 0.0);                     ... If x < 0.0, use 0.0, otherwise x. Same as the conditional
2090      * dst.z = x > 0.0 ? pow(max(y, 0.0), p) : 0;   ... 0 ^ power is 0, and otherwise we use y anyway
2091      * dst.w = 1.0.                                 ... Nothing fancy.
2092      *
2093      * So we still have one conditional in there. So do this:
2094      * dst.z = pow(max(0.0, src.y) * step(0.0, src.x), power);
2095      *
2096      * step(0.0, x) will return 1 if src.x > 0.0, and 0 otherwise. So if y is 0 we get pow(0.0 * 1.0, power),
2097      * which sets dst.z to 0. If y > 0, but x = 0.0, we get pow(y * 0.0, power), which results in 0 too.
2098      * if both x and y are > 0, we get pow(y * 1.0, power), as it is supposed to
2099      */
2100     shader_addline(arg->buffer, "vec4(1.0, max(%s, 0.0), pow(max(0.0, %s) * step(0.0, %s), clamp(%s, -128.0, 128.0)), 1.0)%s);\n",
2101                    src0_param.param_str, src1_param.param_str, src0_param.param_str, src3_param.param_str, dst_mask);
2102 }
2103
2104 /** Process the WINED3DSIO_DST instruction in GLSL:
2105  * dst.x = 1.0
2106  * dst.y = src0.x * src0.y
2107  * dst.z = src0.z
2108  * dst.w = src1.w
2109  */
2110 static void shader_glsl_dst(const SHADER_OPCODE_ARG *arg)
2111 {
2112     glsl_src_param_t src0y_param;
2113     glsl_src_param_t src0z_param;
2114     glsl_src_param_t src1y_param;
2115     glsl_src_param_t src1w_param;
2116     char dst_mask[6];
2117
2118     shader_glsl_append_dst(arg->buffer, arg);
2119     shader_glsl_get_write_mask(arg->dst, dst_mask);
2120
2121     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_1, &src0y_param);
2122     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_2, &src0z_param);
2123     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_1, &src1y_param);
2124     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_3, &src1w_param);
2125
2126     shader_addline(arg->buffer, "vec4(1.0, %s * %s, %s, %s))%s;\n",
2127             src0y_param.param_str, src1y_param.param_str, src0z_param.param_str, src1w_param.param_str, dst_mask);
2128 }
2129
2130 /** Process the WINED3DSIO_SINCOS instruction in GLSL:
2131  * VS 2.0 requires that specific cosine and sine constants be passed to this instruction so the hardware
2132  * can handle it.  But, these functions are built-in for GLSL, so we can just ignore the last 2 params.
2133  * 
2134  * dst.x = cos(src0.?)
2135  * dst.y = sin(src0.?)
2136  * dst.z = dst.z
2137  * dst.w = dst.w
2138  */
2139 static void shader_glsl_sincos(const SHADER_OPCODE_ARG *arg)
2140 {
2141     glsl_src_param_t src0_param;
2142     DWORD write_mask;
2143
2144     write_mask = shader_glsl_append_dst(arg->buffer, arg);
2145     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
2146
2147     switch (write_mask) {
2148         case WINED3DSP_WRITEMASK_0:
2149             shader_addline(arg->buffer, "cos(%s));\n", src0_param.param_str);
2150             break;
2151
2152         case WINED3DSP_WRITEMASK_1:
2153             shader_addline(arg->buffer, "sin(%s));\n", src0_param.param_str);
2154             break;
2155
2156         case (WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1):
2157             shader_addline(arg->buffer, "vec2(cos(%s), sin(%s)));\n", src0_param.param_str, src0_param.param_str);
2158             break;
2159
2160         default:
2161             ERR("Write mask should be .x, .y or .xy\n");
2162             break;
2163     }
2164 }
2165
2166 /** Process the WINED3DSIO_LOOP instruction in GLSL:
2167  * Start a for() loop where src1.y is the initial value of aL,
2168  *  increment aL by src1.z for a total of src1.x iterations.
2169  *  Need to use a temporary variable for this operation.
2170  */
2171 /* FIXME: I don't think nested loops will work correctly this way. */
2172 static void shader_glsl_loop(const SHADER_OPCODE_ARG *arg)
2173 {
2174     glsl_src_param_t src1_param;
2175     IWineD3DBaseShaderImpl* shader = (IWineD3DBaseShaderImpl*) arg->shader;
2176     DWORD regtype = shader_get_regtype(arg->src[1]);
2177     DWORD reg = arg->src[1] & WINED3DSP_REGNUM_MASK;
2178     const DWORD *control_values = NULL;
2179     const local_constant *constant;
2180
2181     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_ALL, &src1_param);
2182
2183     /* Try to hardcode the loop control parameters if possible. Direct3D 9 class hardware doesn't support real
2184      * varying indexing, but Microsoft designed this feature for Shader model 2.x+. If the loop control is
2185      * known at compile time, the GLSL compiler can unroll the loop, and replace indirect addressing with direct
2186      * addressing.
2187      */
2188     if(regtype == WINED3DSPR_CONSTINT) {
2189         LIST_FOR_EACH_ENTRY(constant, &shader->baseShader.constantsI, local_constant, entry) {
2190             if(constant->idx == reg) {
2191                 control_values = constant->value;
2192                 break;
2193             }
2194         }
2195     }
2196
2197     if(control_values) {
2198         if(control_values[2] > 0) {
2199             shader_addline(arg->buffer, "for (aL%u = %d; aL%u < (%d * %d + %d); aL%u += %d) {\n",
2200                            shader->baseShader.cur_loop_depth, control_values[1],
2201                            shader->baseShader.cur_loop_depth, control_values[0], control_values[2], control_values[1],
2202                            shader->baseShader.cur_loop_depth, control_values[2]);
2203         } else if(control_values[2] == 0) {
2204             shader_addline(arg->buffer, "for (aL%u = %d, tmpInt%u = 0; tmpInt%u < %d; tmpInt%u++) {\n",
2205                            shader->baseShader.cur_loop_depth, control_values[1], shader->baseShader.cur_loop_depth,
2206                            shader->baseShader.cur_loop_depth, control_values[0],
2207                            shader->baseShader.cur_loop_depth);
2208         } else {
2209             shader_addline(arg->buffer, "for (aL%u = %d; aL%u > (%d * %d + %d); aL%u += %d) {\n",
2210                            shader->baseShader.cur_loop_depth, control_values[1],
2211                            shader->baseShader.cur_loop_depth, control_values[0], control_values[2], control_values[1],
2212                            shader->baseShader.cur_loop_depth, control_values[2]);
2213         }
2214     } else {
2215         shader_addline(arg->buffer, "for (tmpInt%u = 0, aL%u = %s.y; tmpInt%u < %s.x; tmpInt%u++, aL%u += %s.z) {\n",
2216                        shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_regno,
2217                        src1_param.reg_name, shader->baseShader.cur_loop_depth, src1_param.reg_name,
2218                        shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_regno, src1_param.reg_name);
2219     }
2220
2221     shader->baseShader.cur_loop_depth++;
2222     shader->baseShader.cur_loop_regno++;
2223 }
2224
2225 static void shader_glsl_end(const SHADER_OPCODE_ARG *arg)
2226 {
2227     IWineD3DBaseShaderImpl* shader = (IWineD3DBaseShaderImpl*) arg->shader;
2228
2229     shader_addline(arg->buffer, "}\n");
2230
2231     if(arg->opcode->opcode == WINED3DSIO_ENDLOOP) {
2232         shader->baseShader.cur_loop_depth--;
2233         shader->baseShader.cur_loop_regno--;
2234     }
2235     if(arg->opcode->opcode == WINED3DSIO_ENDREP) {
2236         shader->baseShader.cur_loop_depth--;
2237     }
2238 }
2239
2240 static void shader_glsl_rep(const SHADER_OPCODE_ARG *arg)
2241 {
2242     IWineD3DBaseShaderImpl* shader = (IWineD3DBaseShaderImpl*) arg->shader;
2243     glsl_src_param_t src0_param;
2244
2245     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
2246     shader_addline(arg->buffer, "for (tmpInt%d = 0; tmpInt%d < %s; tmpInt%d++) {\n",
2247                    shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_depth,
2248                    src0_param.param_str, shader->baseShader.cur_loop_depth);
2249     shader->baseShader.cur_loop_depth++;
2250 }
2251
2252 static void shader_glsl_if(const SHADER_OPCODE_ARG *arg)
2253 {
2254     glsl_src_param_t src0_param;
2255
2256     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
2257     shader_addline(arg->buffer, "if (%s) {\n", src0_param.param_str);
2258 }
2259
2260 static void shader_glsl_ifc(const SHADER_OPCODE_ARG *arg)
2261 {
2262     glsl_src_param_t src0_param;
2263     glsl_src_param_t src1_param;
2264
2265     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
2266     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0, &src1_param);
2267
2268     shader_addline(arg->buffer, "if (%s %s %s) {\n",
2269             src0_param.param_str, shader_get_comp_op(arg->opcode_token), src1_param.param_str);
2270 }
2271
2272 static void shader_glsl_else(const SHADER_OPCODE_ARG *arg)
2273 {
2274     shader_addline(arg->buffer, "} else {\n");
2275 }
2276
2277 static void shader_glsl_break(const SHADER_OPCODE_ARG *arg)
2278 {
2279     shader_addline(arg->buffer, "break;\n");
2280 }
2281
2282 /* FIXME: According to MSDN the compare is done per component. */
2283 static void shader_glsl_breakc(const SHADER_OPCODE_ARG *arg)
2284 {
2285     glsl_src_param_t src0_param;
2286     glsl_src_param_t src1_param;
2287
2288     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
2289     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0, &src1_param);
2290
2291     shader_addline(arg->buffer, "if (%s %s %s) break;\n",
2292             src0_param.param_str, shader_get_comp_op(arg->opcode_token), src1_param.param_str);
2293 }
2294
2295 static void shader_glsl_label(const SHADER_OPCODE_ARG *arg)
2296 {
2297
2298     DWORD snum = (arg->src[0]) & WINED3DSP_REGNUM_MASK;
2299     shader_addline(arg->buffer, "}\n");
2300     shader_addline(arg->buffer, "void subroutine%u () {\n",  snum);
2301 }
2302
2303 static void shader_glsl_call(const SHADER_OPCODE_ARG *arg)
2304 {
2305     DWORD snum = (arg->src[0]) & WINED3DSP_REGNUM_MASK;
2306     shader_addline(arg->buffer, "subroutine%u();\n", snum);
2307 }
2308
2309 static void shader_glsl_callnz(const SHADER_OPCODE_ARG *arg)
2310 {
2311     glsl_src_param_t src1_param;
2312
2313     DWORD snum = (arg->src[0]) & WINED3DSP_REGNUM_MASK;
2314     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0, &src1_param);
2315     shader_addline(arg->buffer, "if (%s) subroutine%u();\n", src1_param.param_str, snum);
2316 }
2317
2318 /*********************************************
2319  * Pixel Shader Specific Code begins here
2320  ********************************************/
2321 static void pshader_glsl_tex(const SHADER_OPCODE_ARG *arg)
2322 {
2323     IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
2324     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
2325     DWORD shader_version = arg->reg_maps->shader_version;
2326     glsl_sample_function_t sample_function;
2327     DWORD sample_flags = 0;
2328     DWORD sampler_type;
2329     DWORD sampler_idx;
2330     DWORD mask = 0, swizzle;
2331
2332     /* 1.0-1.4: Use destination register as sampler source.
2333      * 2.0+: Use provided sampler source. */
2334     if (shader_version < WINED3DPS_VERSION(2,0)) sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2335     else sampler_idx = arg->src[1] & WINED3DSP_REGNUM_MASK;
2336     sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
2337
2338     if (shader_version < WINED3DPS_VERSION(1,4))
2339     {
2340         DWORD flags = deviceImpl->stateBlock->textureState[sampler_idx][WINED3DTSS_TEXTURETRANSFORMFLAGS];
2341
2342         /* Projected cube textures don't make a lot of sense, the resulting coordinates stay the same. */
2343         if (flags & WINED3DTTFF_PROJECTED && sampler_type != WINED3DSTT_CUBE) {
2344             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
2345             switch (flags & ~WINED3DTTFF_PROJECTED) {
2346                 case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
2347                 case WINED3DTTFF_COUNT2: mask = WINED3DSP_WRITEMASK_1; break;
2348                 case WINED3DTTFF_COUNT3: mask = WINED3DSP_WRITEMASK_2; break;
2349                 case WINED3DTTFF_COUNT4:
2350                 case WINED3DTTFF_DISABLE: mask = WINED3DSP_WRITEMASK_3; break;
2351             }
2352         }
2353     }
2354     else if (shader_version < WINED3DPS_VERSION(2,0))
2355     {
2356         DWORD src_mod = arg->src[0] & WINED3DSP_SRCMOD_MASK;
2357
2358         if (src_mod == WINED3DSPSM_DZ) {
2359             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
2360             mask = WINED3DSP_WRITEMASK_2;
2361         } else if (src_mod == WINED3DSPSM_DW) {
2362             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
2363             mask = WINED3DSP_WRITEMASK_3;
2364         }
2365     } else {
2366         if(arg->opcode_token & WINED3DSI_TEXLD_PROJECT) {
2367             /* ps 2.0 texldp instruction always divides by the fourth component. */
2368             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
2369             mask = WINED3DSP_WRITEMASK_3;
2370         }
2371     }
2372
2373     if(deviceImpl->stateBlock->textures[sampler_idx] &&
2374        IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
2375         sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
2376     }
2377
2378     shader_glsl_get_sample_function(sampler_type, sample_flags, &sample_function);
2379     mask |= sample_function.coord_mask;
2380
2381     if (shader_version < WINED3DPS_VERSION(2,0)) swizzle = WINED3DVS_NOSWIZZLE;
2382     else swizzle = arg->src[1] & WINED3DSP_SWIZZLE_MASK;
2383
2384     /* 1.0-1.3: Use destination register as coordinate source.
2385        1.4+: Use provided coordinate source register. */
2386     if (shader_version < WINED3DPS_VERSION(1,4))
2387     {
2388         char coord_mask[6];
2389         shader_glsl_get_write_mask(mask, coord_mask);
2390         shader_glsl_gen_sample_code(arg, sampler_idx, &sample_function, swizzle, NULL,
2391                 "T%u%s", sampler_idx, coord_mask);
2392     } else {
2393         glsl_src_param_t coord_param;
2394         shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], mask, &coord_param);
2395         if(arg->opcode_token & WINED3DSI_TEXLD_BIAS) {
2396             glsl_src_param_t bias;
2397             shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &bias);
2398             shader_glsl_gen_sample_code(arg, sampler_idx, &sample_function, swizzle, bias.param_str,
2399                     "%s", coord_param.param_str);
2400         } else {
2401             shader_glsl_gen_sample_code(arg, sampler_idx, &sample_function, swizzle, NULL,
2402                     "%s", coord_param.param_str);
2403         }
2404     }
2405 }
2406
2407 static void shader_glsl_texldl(const SHADER_OPCODE_ARG *arg)
2408 {
2409     IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*)arg->shader;
2410     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
2411     glsl_sample_function_t sample_function;
2412     glsl_src_param_t coord_param, lod_param;
2413     DWORD sample_flags = WINED3D_GLSL_SAMPLE_LOD;
2414     DWORD sampler_type;
2415     DWORD sampler_idx;
2416     DWORD swizzle = arg->src[1] & WINED3DSP_SWIZZLE_MASK;
2417
2418     sampler_idx = arg->src[1] & WINED3DSP_REGNUM_MASK;
2419     sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
2420     if(deviceImpl->stateBlock->textures[sampler_idx] &&
2421        IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
2422         sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
2423     }
2424     shader_glsl_get_sample_function(sampler_type, sample_flags, &sample_function);
2425     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], sample_function.coord_mask, &coord_param);
2426
2427     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &lod_param);
2428
2429     if (shader_is_pshader_version(arg->reg_maps->shader_version))
2430     {
2431         /* The GLSL spec claims the Lod sampling functions are only supported in vertex shaders.
2432          * However, they seem to work just fine in fragment shaders as well. */
2433         WARN("Using %s in fragment shader.\n", sample_function.name);
2434     }
2435     shader_glsl_gen_sample_code(arg, sampler_idx, &sample_function, swizzle, lod_param.param_str,
2436             "%s", coord_param.param_str);
2437 }
2438
2439 static void pshader_glsl_texcoord(const SHADER_OPCODE_ARG *arg)
2440 {
2441     /* FIXME: Make this work for more than just 2D textures */
2442     SHADER_BUFFER* buffer = arg->buffer;
2443     DWORD write_mask;
2444     char dst_mask[6];
2445
2446     write_mask = shader_glsl_append_dst(arg->buffer, arg);
2447     shader_glsl_get_write_mask(write_mask, dst_mask);
2448
2449     if (arg->reg_maps->shader_version != WINED3DPS_VERSION(1,4))
2450     {
2451         DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2452         shader_addline(buffer, "clamp(gl_TexCoord[%u], 0.0, 1.0)%s);\n", reg, dst_mask);
2453     } else {
2454         DWORD reg = arg->src[0] & WINED3DSP_REGNUM_MASK;
2455         DWORD src_mod = arg->src[0] & WINED3DSP_SRCMOD_MASK;
2456         char dst_swizzle[6];
2457
2458         shader_glsl_get_swizzle(arg->src[0], FALSE, write_mask, dst_swizzle);
2459
2460         if (src_mod == WINED3DSPSM_DZ) {
2461             glsl_src_param_t div_param;
2462             unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2463             shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_2, &div_param);
2464
2465             if (mask_size > 1) {
2466                 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
2467             } else {
2468                 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
2469             }
2470         } else if (src_mod == WINED3DSPSM_DW) {
2471             glsl_src_param_t div_param;
2472             unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2473             shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &div_param);
2474
2475             if (mask_size > 1) {
2476                 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
2477             } else {
2478                 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
2479             }
2480         } else {
2481             shader_addline(buffer, "gl_TexCoord[%u]%s);\n", reg, dst_swizzle);
2482         }
2483     }
2484 }
2485
2486 /** Process the WINED3DSIO_TEXDP3TEX instruction in GLSL:
2487  * Take a 3-component dot product of the TexCoord[dstreg] and src,
2488  * then perform a 1D texture lookup from stage dstregnum, place into dst. */
2489 static void pshader_glsl_texdp3tex(const SHADER_OPCODE_ARG *arg)
2490 {
2491     glsl_src_param_t src0_param;
2492     glsl_sample_function_t sample_function;
2493     DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2494     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2495     DWORD sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
2496     UINT mask_size;
2497
2498     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2499
2500     /* Do I have to take care about the projected bit? I don't think so, since the dp3 returns only one
2501      * scalar, and projected sampling would require 4.
2502      *
2503      * It is a dependent read - not valid with conditional NP2 textures
2504      */
2505     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
2506     mask_size = shader_glsl_get_write_mask_size(sample_function.coord_mask);
2507
2508     switch(mask_size)
2509     {
2510         case 1:
2511             shader_glsl_gen_sample_code(arg, sampler_idx, &sample_function, WINED3DVS_NOSWIZZLE, NULL,
2512                     "dot(gl_TexCoord[%u].xyz, %s)", sampler_idx, src0_param.param_str);
2513             break;
2514
2515         case 2:
2516             shader_glsl_gen_sample_code(arg, sampler_idx, &sample_function, WINED3DVS_NOSWIZZLE, NULL,
2517                     "vec2(dot(gl_TexCoord[%u].xyz, %s), 0.0)", sampler_idx, src0_param.param_str);
2518             break;
2519
2520         case 3:
2521             shader_glsl_gen_sample_code(arg, sampler_idx, &sample_function, WINED3DVS_NOSWIZZLE, NULL,
2522                     "vec3(dot(gl_TexCoord[%u].xyz, %s), 0.0, 0.0)", sampler_idx, src0_param.param_str);
2523             break;
2524
2525         default:
2526             FIXME("Unexpected mask size %u\n", mask_size);
2527             break;
2528     }
2529 }
2530
2531 /** Process the WINED3DSIO_TEXDP3 instruction in GLSL:
2532  * Take a 3-component dot product of the TexCoord[dstreg] and src. */
2533 static void pshader_glsl_texdp3(const SHADER_OPCODE_ARG *arg)
2534 {
2535     glsl_src_param_t src0_param;
2536     DWORD dstreg = arg->dst & WINED3DSP_REGNUM_MASK;
2537     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2538     DWORD dst_mask;
2539     unsigned int mask_size;
2540
2541     dst_mask = shader_glsl_append_dst(arg->buffer, arg);
2542     mask_size = shader_glsl_get_write_mask_size(dst_mask);
2543     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2544
2545     if (mask_size > 1) {
2546         shader_addline(arg->buffer, "vec%d(dot(T%u.xyz, %s)));\n", mask_size, dstreg, src0_param.param_str);
2547     } else {
2548         shader_addline(arg->buffer, "dot(T%u.xyz, %s));\n", dstreg, src0_param.param_str);
2549     }
2550 }
2551
2552 /** Process the WINED3DSIO_TEXDEPTH instruction in GLSL:
2553  * Calculate the depth as dst.x / dst.y   */
2554 static void pshader_glsl_texdepth(const SHADER_OPCODE_ARG *arg)
2555 {
2556     glsl_dst_param_t dst_param;
2557
2558     shader_glsl_add_dst_param(arg, arg->dst, 0, &dst_param);
2559
2560     /* Tests show that texdepth never returns anything below 0.0, and that r5.y is clamped to 1.0.
2561      * Negative input is accepted, -0.25 / -0.5 returns 0.5. GL should clamp gl_FragDepth to [0;1], but
2562      * this doesn't always work, so clamp the results manually. Whether or not the x value is clamped at 1
2563      * too is irrelevant, since if x = 0, any y value < 1.0 (and > 1.0 is not allowed) results in a result
2564      * >= 1.0 or < 0.0
2565      */
2566     shader_addline(arg->buffer, "gl_FragDepth = clamp((%s.x / min(%s.y, 1.0)), 0.0, 1.0);\n", dst_param.reg_name, dst_param.reg_name);
2567 }
2568
2569 /** Process the WINED3DSIO_TEXM3X2DEPTH instruction in GLSL:
2570  * Last row of a 3x2 matrix multiply, use the result to calculate the depth:
2571  * Calculate tmp0.y = TexCoord[dstreg] . src.xyz;  (tmp0.x has already been calculated)
2572  * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y
2573  */
2574 static void pshader_glsl_texm3x2depth(const SHADER_OPCODE_ARG *arg)
2575 {
2576     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2577     DWORD dstreg = arg->dst & WINED3DSP_REGNUM_MASK;
2578     glsl_src_param_t src0_param;
2579
2580     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2581
2582     shader_addline(arg->buffer, "tmp0.y = dot(T%u.xyz, %s);\n", dstreg, src0_param.param_str);
2583     shader_addline(arg->buffer, "gl_FragDepth = (tmp0.y == 0.0) ? 1.0 : clamp(tmp0.x / tmp0.y, 0.0, 1.0);\n");
2584 }
2585
2586 /** Process the WINED3DSIO_TEXM3X2PAD instruction in GLSL
2587  * Calculate the 1st of a 2-row matrix multiplication. */
2588 static void pshader_glsl_texm3x2pad(const SHADER_OPCODE_ARG *arg)
2589 {
2590     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2591     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2592     SHADER_BUFFER* buffer = arg->buffer;
2593     glsl_src_param_t src0_param;
2594
2595     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2596     shader_addline(buffer, "tmp0.x = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
2597 }
2598
2599 /** Process the WINED3DSIO_TEXM3X3PAD instruction in GLSL
2600  * Calculate the 1st or 2nd row of a 3-row matrix multiplication. */
2601 static void pshader_glsl_texm3x3pad(const SHADER_OPCODE_ARG* arg)
2602 {
2603     IWineD3DPixelShaderImpl* shader = (IWineD3DPixelShaderImpl*) arg->shader;
2604     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2605     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2606     SHADER_BUFFER* buffer = arg->buffer;
2607     SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
2608     glsl_src_param_t src0_param;
2609
2610     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2611     shader_addline(buffer, "tmp0.%c = dot(T%u.xyz, %s);\n", 'x' + current_state->current_row, reg, src0_param.param_str);
2612     current_state->texcoord_w[current_state->current_row++] = reg;
2613 }
2614
2615 static void pshader_glsl_texm3x2tex(const SHADER_OPCODE_ARG *arg)
2616 {
2617     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2618     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2619     SHADER_BUFFER* buffer = arg->buffer;
2620     glsl_src_param_t src0_param;
2621     DWORD sampler_type = arg->reg_maps->samplers[reg] & WINED3DSP_TEXTURETYPE_MASK;
2622     glsl_sample_function_t sample_function;
2623
2624     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2625     shader_addline(buffer, "tmp0.y = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
2626
2627     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
2628
2629     /* Sample the texture using the calculated coordinates */
2630     shader_glsl_gen_sample_code(arg, reg, &sample_function, WINED3DVS_NOSWIZZLE, NULL, "tmp0.xy");
2631 }
2632
2633 /** Process the WINED3DSIO_TEXM3X3TEX instruction in GLSL
2634  * Perform the 3rd row of a 3x3 matrix multiply, then sample the texture using the calculated coordinates */
2635 static void pshader_glsl_texm3x3tex(const SHADER_OPCODE_ARG *arg)
2636 {
2637     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2638     glsl_src_param_t src0_param;
2639     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2640     IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
2641     SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
2642     DWORD sampler_type = arg->reg_maps->samplers[reg] & WINED3DSP_TEXTURETYPE_MASK;
2643     glsl_sample_function_t sample_function;
2644
2645     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2646     shader_addline(arg->buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
2647
2648     /* Dependent read, not valid with conditional NP2 */
2649     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
2650
2651     /* Sample the texture using the calculated coordinates */
2652     shader_glsl_gen_sample_code(arg, reg, &sample_function, WINED3DVS_NOSWIZZLE, NULL, "tmp0.xyz");
2653
2654     current_state->current_row = 0;
2655 }
2656
2657 /** Process the WINED3DSIO_TEXM3X3 instruction in GLSL
2658  * Perform the 3rd row of a 3x3 matrix multiply */
2659 static void pshader_glsl_texm3x3(const SHADER_OPCODE_ARG *arg)
2660 {
2661     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2662     glsl_src_param_t src0_param;
2663     char dst_mask[6];
2664     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2665     IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
2666     SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
2667
2668     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2669
2670     shader_glsl_append_dst(arg->buffer, arg);
2671     shader_glsl_get_write_mask(arg->dst, dst_mask);
2672     shader_addline(arg->buffer, "vec4(tmp0.xy, dot(T%u.xyz, %s), 1.0)%s);\n", reg, src0_param.param_str, dst_mask);
2673
2674     current_state->current_row = 0;
2675 }
2676
2677 /** Process the WINED3DSIO_TEXM3X3SPEC instruction in GLSL 
2678  * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
2679 static void pshader_glsl_texm3x3spec(const SHADER_OPCODE_ARG *arg)
2680 {
2681     IWineD3DPixelShaderImpl* shader = (IWineD3DPixelShaderImpl*) arg->shader;
2682     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2683     glsl_src_param_t src0_param;
2684     glsl_src_param_t src1_param;
2685     SHADER_BUFFER* buffer = arg->buffer;
2686     SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
2687     DWORD stype = arg->reg_maps->samplers[reg] & WINED3DSP_TEXTURETYPE_MASK;
2688     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2689     glsl_sample_function_t sample_function;
2690
2691     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2692     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], src_mask, &src1_param);
2693
2694     /* Perform the last matrix multiply operation */
2695     shader_addline(buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
2696     /* Reflection calculation */
2697     shader_addline(buffer, "tmp0.xyz = -reflect((%s), normalize(tmp0.xyz));\n", src1_param.param_str);
2698
2699     /* Dependent read, not valid with conditional NP2 */
2700     shader_glsl_get_sample_function(stype, 0, &sample_function);
2701
2702     /* Sample the texture */
2703     shader_glsl_gen_sample_code(arg, reg, &sample_function, WINED3DVS_NOSWIZZLE, NULL, "tmp0.xyz");
2704
2705     current_state->current_row = 0;
2706 }
2707
2708 /** Process the WINED3DSIO_TEXM3X3VSPEC instruction in GLSL 
2709  * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
2710 static void pshader_glsl_texm3x3vspec(const SHADER_OPCODE_ARG *arg)
2711 {
2712     IWineD3DPixelShaderImpl* shader = (IWineD3DPixelShaderImpl*) arg->shader;
2713     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2714     SHADER_BUFFER* buffer = arg->buffer;
2715     SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
2716     glsl_src_param_t src0_param;
2717     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2718     DWORD sampler_type = arg->reg_maps->samplers[reg] & WINED3DSP_TEXTURETYPE_MASK;
2719     glsl_sample_function_t sample_function;
2720
2721     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2722
2723     /* Perform the last matrix multiply operation */
2724     shader_addline(buffer, "tmp0.z = dot(vec3(T%u), vec3(%s));\n", reg, src0_param.param_str);
2725
2726     /* Construct the eye-ray vector from w coordinates */
2727     shader_addline(buffer, "tmp1.xyz = normalize(vec3(gl_TexCoord[%u].w, gl_TexCoord[%u].w, gl_TexCoord[%u].w));\n",
2728             current_state->texcoord_w[0], current_state->texcoord_w[1], reg);
2729     shader_addline(buffer, "tmp0.xyz = -reflect(tmp1.xyz, normalize(tmp0.xyz));\n");
2730
2731     /* Dependent read, not valid with conditional NP2 */
2732     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
2733
2734     /* Sample the texture using the calculated coordinates */
2735     shader_glsl_gen_sample_code(arg, reg, &sample_function, WINED3DVS_NOSWIZZLE, NULL, "tmp0.xyz");
2736
2737     current_state->current_row = 0;
2738 }
2739
2740 /** Process the WINED3DSIO_TEXBEM instruction in GLSL.
2741  * Apply a fake bump map transform.
2742  * texbem is pshader <= 1.3 only, this saves a few version checks
2743  */
2744 static void pshader_glsl_texbem(const SHADER_OPCODE_ARG *arg)
2745 {
2746     IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
2747     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
2748     glsl_sample_function_t sample_function;
2749     glsl_src_param_t coord_param;
2750     DWORD sampler_type;
2751     DWORD sampler_idx;
2752     DWORD mask;
2753     DWORD flags;
2754     char coord_mask[6];
2755
2756     sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2757     flags = deviceImpl->stateBlock->textureState[sampler_idx][WINED3DTSS_TEXTURETRANSFORMFLAGS];
2758
2759     sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
2760     /* Dependent read, not valid with conditional NP2 */
2761     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
2762     mask = sample_function.coord_mask;
2763
2764     shader_glsl_get_write_mask(mask, coord_mask);
2765
2766     /* with projective textures, texbem only divides the static texture coord, not the displacement,
2767          * so we can't let the GL handle this.
2768          */
2769     if (flags & WINED3DTTFF_PROJECTED) {
2770         DWORD div_mask=0;
2771         char coord_div_mask[3];
2772         switch (flags & ~WINED3DTTFF_PROJECTED) {
2773             case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
2774             case WINED3DTTFF_COUNT2: div_mask = WINED3DSP_WRITEMASK_1; break;
2775             case WINED3DTTFF_COUNT3: div_mask = WINED3DSP_WRITEMASK_2; break;
2776             case WINED3DTTFF_COUNT4:
2777             case WINED3DTTFF_DISABLE: div_mask = WINED3DSP_WRITEMASK_3; break;
2778         }
2779         shader_glsl_get_write_mask(div_mask, coord_div_mask);
2780         shader_addline(arg->buffer, "T%u%s /= T%u%s;\n", sampler_idx, coord_mask, sampler_idx, coord_div_mask);
2781     }
2782
2783     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0|WINED3DSP_WRITEMASK_1, &coord_param);
2784
2785     shader_glsl_gen_sample_code(arg, sampler_idx, &sample_function, WINED3DVS_NOSWIZZLE, NULL,
2786             "T%u%s + vec4(bumpenvmat%d * %s, 0.0, 0.0)%s", sampler_idx, coord_mask, sampler_idx,
2787             coord_param.param_str, coord_mask);
2788
2789     if(arg->opcode->opcode == WINED3DSIO_TEXBEML) {
2790         glsl_src_param_t luminance_param;
2791         glsl_dst_param_t dst_param;
2792
2793         shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_2, &luminance_param);
2794         shader_glsl_add_dst_param(arg, arg->dst, arg->dst_addr, &dst_param);
2795
2796         shader_addline(arg->buffer, "%s%s *= (%s * luminancescale%d + luminanceoffset%d);\n",
2797                        dst_param.reg_name, dst_param.mask_str,
2798                        luminance_param.param_str, sampler_idx, sampler_idx);
2799     }
2800 }
2801
2802 static void pshader_glsl_bem(const SHADER_OPCODE_ARG *arg)
2803 {
2804     glsl_src_param_t src0_param, src1_param;
2805     DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2806
2807     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0|WINED3DSP_WRITEMASK_1, &src0_param);
2808     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0|WINED3DSP_WRITEMASK_1, &src1_param);
2809
2810     shader_glsl_append_dst(arg->buffer, arg);
2811     shader_addline(arg->buffer, "%s + bumpenvmat%d * %s);\n",
2812                    src0_param.param_str, sampler_idx, src1_param.param_str);
2813 }
2814
2815 /** Process the WINED3DSIO_TEXREG2AR instruction in GLSL
2816  * Sample 2D texture at dst using the alpha & red (wx) components of src as texture coordinates */
2817 static void pshader_glsl_texreg2ar(const SHADER_OPCODE_ARG *arg)
2818 {
2819     glsl_src_param_t src0_param;
2820     DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2821     DWORD sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
2822     glsl_sample_function_t sample_function;
2823
2824     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
2825
2826     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
2827     shader_glsl_gen_sample_code(arg, sampler_idx, &sample_function, WINED3DVS_NOSWIZZLE, NULL,
2828             "%s.wx", src0_param.reg_name);
2829 }
2830
2831 /** Process the WINED3DSIO_TEXREG2GB instruction in GLSL
2832  * Sample 2D texture at dst using the green & blue (yz) components of src as texture coordinates */
2833 static void pshader_glsl_texreg2gb(const SHADER_OPCODE_ARG *arg)
2834 {
2835     glsl_src_param_t src0_param;
2836     DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2837     DWORD sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
2838     glsl_sample_function_t sample_function;
2839
2840     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
2841
2842     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
2843     shader_glsl_gen_sample_code(arg, sampler_idx, &sample_function, WINED3DVS_NOSWIZZLE, NULL,
2844             "%s.yz", src0_param.reg_name);
2845 }
2846
2847 /** Process the WINED3DSIO_TEXREG2RGB instruction in GLSL
2848  * Sample texture at dst using the rgb (xyz) components of src as texture coordinates */
2849 static void pshader_glsl_texreg2rgb(const SHADER_OPCODE_ARG *arg)
2850 {
2851     glsl_src_param_t src0_param;
2852     DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2853     DWORD sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
2854     glsl_sample_function_t sample_function;
2855
2856     /* Dependent read, not valid with conditional NP2 */
2857     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
2858     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], sample_function.coord_mask, &src0_param);
2859
2860     shader_glsl_gen_sample_code(arg, sampler_idx, &sample_function, WINED3DVS_NOSWIZZLE, NULL,
2861             "%s", src0_param.param_str);
2862 }
2863
2864 /** Process the WINED3DSIO_TEXKILL instruction in GLSL.
2865  * If any of the first 3 components are < 0, discard this pixel */
2866 static void pshader_glsl_texkill(const SHADER_OPCODE_ARG *arg)
2867 {
2868     glsl_dst_param_t dst_param;
2869
2870     /* The argument is a destination parameter, and no writemasks are allowed */
2871     shader_glsl_add_dst_param(arg, arg->dst, 0, &dst_param);
2872     if ((arg->reg_maps->shader_version >= WINED3DPS_VERSION(2,0)))
2873     {
2874         /* 2.0 shaders compare all 4 components in texkill */
2875         shader_addline(arg->buffer, "if (any(lessThan(%s.xyzw, vec4(0.0)))) discard;\n", dst_param.reg_name);
2876     } else {
2877         /* 1.X shaders only compare the first 3 components, probably due to the nature of the texkill
2878          * instruction as a tex* instruction, and phase, which kills all a / w components. Even if all
2879          * 4 components are defined, only the first 3 are used
2880          */
2881         shader_addline(arg->buffer, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;\n", dst_param.reg_name);
2882     }
2883 }
2884
2885 /** Process the WINED3DSIO_DP2ADD instruction in GLSL.
2886  * dst = dot2(src0, src1) + src2 */
2887 static void pshader_glsl_dp2add(const SHADER_OPCODE_ARG *arg)
2888 {
2889     glsl_src_param_t src0_param;
2890     glsl_src_param_t src1_param;
2891     glsl_src_param_t src2_param;
2892     DWORD write_mask;
2893     unsigned int mask_size;
2894
2895     write_mask = shader_glsl_append_dst(arg->buffer, arg);
2896     mask_size = shader_glsl_get_write_mask_size(write_mask);
2897
2898     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
2899     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
2900     shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], WINED3DSP_WRITEMASK_0, &src2_param);
2901
2902     if (mask_size > 1) {
2903         shader_addline(arg->buffer, "vec%d(dot(%s, %s) + %s));\n", mask_size, src0_param.param_str, src1_param.param_str, src2_param.param_str);
2904     } else {
2905         shader_addline(arg->buffer, "dot(%s, %s) + %s);\n", src0_param.param_str, src1_param.param_str, src2_param.param_str);
2906     }
2907 }
2908
2909 static void pshader_glsl_input_pack(SHADER_BUFFER* buffer, const struct semantic* semantics_in,
2910         IWineD3DPixelShader *iface, enum vertexprocessing_mode vertexprocessing)
2911 {
2912    unsigned int i;
2913    IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *) iface;
2914
2915    for (i = 0; i < MAX_REG_INPUT; i++) {
2916
2917        DWORD usage_token = semantics_in[i].usage;
2918        DWORD register_token = semantics_in[i].reg;
2919        DWORD usage, usage_idx;
2920        char reg_mask[6];
2921
2922        /* Uninitialized */
2923        if (!usage_token) continue;
2924        usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
2925        usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
2926        shader_glsl_get_write_mask(register_token, reg_mask);
2927
2928        switch(usage) {
2929
2930            case WINED3DDECLUSAGE_TEXCOORD:
2931                if(usage_idx < 8 && vertexprocessing == pretransformed) {
2932                    shader_addline(buffer, "IN[%u]%s = gl_TexCoord[%u]%s;\n",
2933                                   This->input_reg_map[i], reg_mask, usage_idx, reg_mask);
2934                } else {
2935                    shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
2936                                   This->input_reg_map[i], reg_mask, reg_mask);
2937                }
2938                break;
2939
2940            case WINED3DDECLUSAGE_COLOR:
2941                if (usage_idx == 0)
2942                    shader_addline(buffer, "IN[%u]%s = vec4(gl_Color)%s;\n",
2943                        This->input_reg_map[i], reg_mask, reg_mask);
2944                else if (usage_idx == 1)
2945                    shader_addline(buffer, "IN[%u]%s = vec4(gl_SecondaryColor)%s;\n",
2946                        This->input_reg_map[i], reg_mask, reg_mask);
2947                else
2948                    shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
2949                        This->input_reg_map[i], reg_mask, reg_mask);
2950                break;
2951
2952            default:
2953                shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
2954                    This->input_reg_map[i], reg_mask, reg_mask);
2955         }
2956     }
2957 }
2958
2959 /*********************************************
2960  * Vertex Shader Specific Code begins here
2961  ********************************************/
2962
2963 static void add_glsl_program_entry(struct shader_glsl_priv *priv, struct glsl_shader_prog_link *entry) {
2964     glsl_program_key_t *key;
2965
2966     key = HeapAlloc(GetProcessHeap(), 0, sizeof(glsl_program_key_t));
2967     key->vshader = entry->vshader;
2968     key->pshader = entry->pshader;
2969     key->vs_args = entry->vs_args;
2970     key->ps_args = entry->ps_args;
2971
2972     hash_table_put(priv->glsl_program_lookup, key, entry);
2973 }
2974
2975 static struct glsl_shader_prog_link *get_glsl_program_entry(struct shader_glsl_priv *priv,
2976         IWineD3DVertexShader *vshader, IWineD3DPixelShader *pshader, struct vs_compile_args *vs_args,
2977         struct ps_compile_args *ps_args) {
2978     glsl_program_key_t key;
2979
2980     key.vshader = vshader;
2981     key.pshader = pshader;
2982     key.vs_args = *vs_args;
2983     key.ps_args = *ps_args;
2984
2985     return hash_table_get(priv->glsl_program_lookup, &key);
2986 }
2987
2988 static void delete_glsl_program_entry(struct shader_glsl_priv *priv, const WineD3D_GL_Info *gl_info,
2989         struct glsl_shader_prog_link *entry)
2990 {
2991     glsl_program_key_t *key;
2992
2993     key = HeapAlloc(GetProcessHeap(), 0, sizeof(glsl_program_key_t));
2994     key->vshader = entry->vshader;
2995     key->pshader = entry->pshader;
2996     key->vs_args = entry->vs_args;
2997     key->ps_args = entry->ps_args;
2998     hash_table_remove(priv->glsl_program_lookup, key);
2999
3000     GL_EXTCALL(glDeleteObjectARB(entry->programId));
3001     if (entry->vshader) list_remove(&entry->vshader_entry);
3002     if (entry->pshader) list_remove(&entry->pshader_entry);
3003     HeapFree(GetProcessHeap(), 0, entry->vuniformF_locations);
3004     HeapFree(GetProcessHeap(), 0, entry->puniformF_locations);
3005     HeapFree(GetProcessHeap(), 0, entry);
3006 }
3007
3008 static void handle_ps3_input(SHADER_BUFFER *buffer, const struct semantic *semantics_in,
3009         const struct semantic *semantics_out, const WineD3D_GL_Info *gl_info, const DWORD *map)
3010 {
3011     unsigned int i, j;
3012     DWORD usage_token, usage_token_out;
3013     DWORD register_token, register_token_out;
3014     DWORD usage, usage_idx, usage_out, usage_idx_out;
3015     DWORD *set;
3016     DWORD in_idx;
3017     DWORD in_count = GL_LIMITS(glsl_varyings) / 4;
3018     char reg_mask[6], reg_mask_out[6];
3019     char destination[50];
3020
3021     set = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*set) * (in_count + 2));
3022
3023     if (!semantics_out) {
3024         /* Save gl_FrontColor & gl_FrontSecondaryColor before overwriting them. */
3025         shader_addline(buffer, "vec4 front_color = gl_FrontColor;\n");
3026         shader_addline(buffer, "vec4 front_secondary_color = gl_FrontSecondaryColor;\n");
3027     }
3028
3029     for(i = 0; i < MAX_REG_INPUT; i++) {
3030         usage_token = semantics_in[i].usage;
3031         if (!usage_token) continue;
3032
3033         in_idx = map[i];
3034         if (in_idx >= (in_count + 2)) {
3035             FIXME("More input varyings declared than supported, expect issues\n");
3036             continue;
3037         } else if(map[i] == -1) {
3038             /* Declared, but not read register */
3039             continue;
3040         }
3041
3042         if (in_idx == in_count) {
3043             sprintf(destination, "gl_FrontColor");
3044         } else if (in_idx == in_count + 1) {
3045             sprintf(destination, "gl_FrontSecondaryColor");
3046         } else {
3047             sprintf(destination, "IN[%u]", in_idx);
3048         }
3049
3050         register_token = semantics_in[i].reg;
3051
3052         usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
3053         usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
3054         set[map[i]] = shader_glsl_get_write_mask(register_token, reg_mask);
3055
3056         if(!semantics_out) {
3057             switch(usage) {
3058                 case WINED3DDECLUSAGE_COLOR:
3059                     if (usage_idx == 0)
3060                         shader_addline(buffer, "%s%s = front_color%s;\n",
3061                                        destination, reg_mask, reg_mask);
3062                     else if (usage_idx == 1)
3063                         shader_addline(buffer, "%s%s = front_secondary_color%s;\n",
3064                                        destination, reg_mask, reg_mask);
3065                     else
3066                         shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3067                                        destination, reg_mask, reg_mask);
3068                     break;
3069
3070                 case WINED3DDECLUSAGE_TEXCOORD:
3071                     if (usage_idx < 8) {
3072                         shader_addline(buffer, "%s%s = gl_TexCoord[%u]%s;\n",
3073                                        destination, reg_mask, usage_idx, reg_mask);
3074                     } else {
3075                         shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3076                                        destination, reg_mask, reg_mask);
3077                     }
3078                     break;
3079
3080                 case WINED3DDECLUSAGE_FOG:
3081                     shader_addline(buffer, "%s%s = vec4(gl_FogFragCoord, 0.0, 0.0, 0.0)%s;\n",
3082                                    destination, reg_mask, reg_mask);
3083                     break;
3084
3085                 default:
3086                     shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3087                                    destination, reg_mask, reg_mask);
3088             }
3089         } else {
3090             BOOL found = FALSE;
3091             for(j = 0; j < MAX_REG_OUTPUT; j++) {
3092                 usage_token_out = semantics_out[j].usage;
3093                 if (!usage_token_out) continue;
3094                 register_token_out = semantics_out[j].reg;
3095
3096                 usage_out = (usage_token_out & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
3097                 usage_idx_out = (usage_token_out & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
3098                 shader_glsl_get_write_mask(register_token_out, reg_mask_out);
3099
3100                 if(usage == usage_out &&
3101                    usage_idx == usage_idx_out) {
3102                     shader_addline(buffer, "%s%s = OUT[%u]%s;\n",
3103                                    destination, reg_mask, j, reg_mask);
3104                     found = TRUE;
3105                 }
3106             }
3107             if(!found) {
3108                 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3109                                destination, reg_mask, reg_mask);
3110             }
3111         }
3112     }
3113
3114     /* This is solely to make the compiler / linker happy and avoid warning about undefined
3115      * varyings. It shouldn't result in any real code executed on the GPU, since all read
3116      * input varyings are assigned above, if the optimizer works properly.
3117      */
3118     for(i = 0; i < in_count + 2; i++) {
3119         if(set[i] != WINED3DSP_WRITEMASK_ALL) {
3120             unsigned int size = 0;
3121             memset(reg_mask, 0, sizeof(reg_mask));
3122             if(!(set[i] & WINED3DSP_WRITEMASK_0)) {
3123                 reg_mask[size] = 'x';
3124                 size++;
3125             }
3126             if(!(set[i] & WINED3DSP_WRITEMASK_1)) {
3127                 reg_mask[size] = 'y';
3128                 size++;
3129             }
3130             if(!(set[i] & WINED3DSP_WRITEMASK_2)) {
3131                 reg_mask[size] = 'z';
3132                 size++;
3133             }
3134             if(!(set[i] & WINED3DSP_WRITEMASK_3)) {
3135                 reg_mask[size] = 'w';
3136                 size++;
3137             }
3138
3139             if (i == in_count) {
3140                 sprintf(destination, "gl_FrontColor");
3141             } else if (i == in_count + 1) {
3142                 sprintf(destination, "gl_FrontSecondaryColor");
3143             } else {
3144                 sprintf(destination, "IN[%u]", i);
3145             }
3146
3147             if (size == 1) {
3148                 shader_addline(buffer, "%s.%s = 0.0;\n", destination, reg_mask);
3149             } else {
3150                 shader_addline(buffer, "%s.%s = vec%u(0.0);\n", destination, reg_mask, size);
3151             }
3152         }
3153     }
3154
3155     HeapFree(GetProcessHeap(), 0, set);
3156 }
3157
3158 static GLhandleARB generate_param_reorder_function(IWineD3DVertexShader *vertexshader,
3159         IWineD3DPixelShader *pixelshader, const WineD3D_GL_Info *gl_info)
3160 {
3161     GLhandleARB ret = 0;
3162     IWineD3DVertexShaderImpl *vs = (IWineD3DVertexShaderImpl *) vertexshader;
3163     IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) pixelshader;
3164     IWineD3DDeviceImpl *device;
3165     DWORD vs_major = WINED3DSHADER_VERSION_MAJOR(vs->baseShader.reg_maps.shader_version);
3166     DWORD ps_major = ps ? WINED3DSHADER_VERSION_MAJOR(ps->baseShader.reg_maps.shader_version) : 0;
3167     unsigned int i;
3168     SHADER_BUFFER buffer;
3169     DWORD usage_token;
3170     DWORD register_token;
3171     DWORD usage, usage_idx, writemask;
3172     char reg_mask[6];
3173     const struct semantic *semantics_out, *semantics_in;
3174
3175     shader_buffer_init(&buffer);
3176
3177     shader_addline(&buffer, "#version 120\n");
3178
3179     if(vs_major < 3 && ps_major < 3) {
3180         /* That one is easy: The vertex shader writes to the builtin varyings, the pixel shader reads from them.
3181          * Take care about the texcoord .w fixup though if we're using the fixed function fragment pipeline
3182          */
3183         device = (IWineD3DDeviceImpl *) vs->baseShader.device;
3184         if((GLINFO_LOCATION).set_texcoord_w && ps_major == 0 && vs_major > 0 &&
3185             !device->frag_pipe->ffp_proj_control) {
3186             shader_addline(&buffer, "void order_ps_input() {\n");
3187             for(i = 0; i < min(8, MAX_REG_TEXCRD); i++) {
3188                 if(vs->baseShader.reg_maps.texcoord_mask[i] != 0 &&
3189                    vs->baseShader.reg_maps.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) {
3190                     shader_addline(&buffer, "gl_TexCoord[%u].w = 1.0;\n", i);
3191                 }
3192             }
3193             shader_addline(&buffer, "}\n");
3194         } else {
3195             shader_addline(&buffer, "void order_ps_input() { /* do nothing */ }\n");
3196         }
3197     } else if(ps_major < 3 && vs_major >= 3) {
3198         /* The vertex shader writes to its own varyings, the pixel shader needs them in the builtin ones */
3199         semantics_out = vs->semantics_out;
3200
3201         shader_addline(&buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
3202         for(i = 0; i < MAX_REG_OUTPUT; i++) {
3203             usage_token = semantics_out[i].usage;
3204             if (!usage_token) continue;
3205             register_token = semantics_out[i].reg;
3206
3207             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
3208             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
3209             writemask = shader_glsl_get_write_mask(register_token, reg_mask);
3210
3211             switch(usage) {
3212                 case WINED3DDECLUSAGE_COLOR:
3213                     if (usage_idx == 0)
3214                         shader_addline(&buffer, "gl_FrontColor%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
3215                     else if (usage_idx == 1)
3216                         shader_addline(&buffer, "gl_FrontSecondaryColor%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
3217                     break;
3218
3219                 case WINED3DDECLUSAGE_POSITION:
3220                     shader_addline(&buffer, "gl_Position%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
3221                     break;
3222
3223                 case WINED3DDECLUSAGE_TEXCOORD:
3224                     if (usage_idx < 8) {
3225                         if(!(GLINFO_LOCATION).set_texcoord_w || ps_major > 0) writemask |= WINED3DSP_WRITEMASK_3;
3226
3227                         shader_addline(&buffer, "gl_TexCoord[%u]%s = OUT[%u]%s;\n",
3228                                         usage_idx, reg_mask, i, reg_mask);
3229                         if(!(writemask & WINED3DSP_WRITEMASK_3)) {
3230                             shader_addline(&buffer, "gl_TexCoord[%u].w = 1.0;\n", usage_idx);
3231                         }
3232                     }
3233                     break;
3234
3235                 case WINED3DDECLUSAGE_PSIZE:
3236                     shader_addline(&buffer, "gl_PointSize = OUT[%u].x;\n", i);
3237                     break;
3238
3239                 case WINED3DDECLUSAGE_FOG:
3240                     shader_addline(&buffer, "gl_FogFragCoord = OUT[%u].%c;\n", i, reg_mask[1]);
3241                     break;
3242
3243                 default:
3244                     break;
3245             }
3246         }
3247         shader_addline(&buffer, "}\n");
3248
3249     } else if(ps_major >= 3 && vs_major >= 3) {
3250         semantics_out = vs->semantics_out;
3251         semantics_in = ps->semantics_in;
3252
3253         /* This one is tricky: a 3.0 pixel shader reads from a 3.0 vertex shader */
3254         shader_addline(&buffer, "varying vec4 IN[%u];\n", GL_LIMITS(glsl_varyings) / 4);
3255         shader_addline(&buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
3256
3257         /* First, sort out position and point size. Those are not passed to the pixel shader */
3258         for(i = 0; i < MAX_REG_OUTPUT; i++) {
3259             usage_token = semantics_out[i].usage;
3260             if (!usage_token) continue;
3261             register_token = semantics_out[i].reg;
3262
3263             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
3264             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
3265             shader_glsl_get_write_mask(register_token, reg_mask);
3266
3267             switch(usage) {
3268                 case WINED3DDECLUSAGE_POSITION:
3269                     shader_addline(&buffer, "gl_Position%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
3270                     break;
3271
3272                 case WINED3DDECLUSAGE_PSIZE:
3273                     shader_addline(&buffer, "gl_PointSize = OUT[%u].x;\n", i);
3274                     break;
3275
3276                 default:
3277                     break;
3278             }
3279         }
3280
3281         /* Then, fix the pixel shader input */
3282         handle_ps3_input(&buffer, semantics_in, semantics_out, gl_info, ps->input_reg_map);
3283
3284         shader_addline(&buffer, "}\n");
3285     } else if(ps_major >= 3 && vs_major < 3) {
3286         semantics_in = ps->semantics_in;
3287
3288         shader_addline(&buffer, "varying vec4 IN[%u];\n", GL_LIMITS(glsl_varyings) / 4);
3289         shader_addline(&buffer, "void order_ps_input() {\n");
3290         /* The vertex shader wrote to the builtin varyings. There is no need to figure out position and
3291          * point size, but we depend on the optimizers kindness to find out that the pixel shader doesn't
3292          * read gl_TexCoord and gl_ColorX, otherwise we'll run out of varyings
3293          */
3294         handle_ps3_input(&buffer, semantics_in, NULL, gl_info, ps->input_reg_map);
3295         shader_addline(&buffer, "}\n");
3296     } else {
3297         ERR("Unexpected vertex and pixel shader version condition: vs: %d, ps: %d\n", vs_major, ps_major);
3298     }
3299
3300     ret = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
3301     checkGLcall("glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)");
3302     GL_EXTCALL(glShaderSourceARB(ret, 1, (const char**)&buffer.buffer, NULL));
3303     checkGLcall("glShaderSourceARB(ret, 1, &buffer.buffer, NULL)");
3304     GL_EXTCALL(glCompileShaderARB(ret));
3305     checkGLcall("glCompileShaderARB(ret)");
3306
3307     shader_buffer_free(&buffer);
3308     return ret;
3309 }
3310
3311 static void hardcode_local_constants(IWineD3DBaseShaderImpl *shader, const WineD3D_GL_Info *gl_info,
3312         GLhandleARB programId, char prefix)
3313 {
3314     const local_constant *lconst;
3315     GLuint tmp_loc;
3316     const float *value;
3317     char glsl_name[8];
3318
3319     LIST_FOR_EACH_ENTRY(lconst, &shader->baseShader.constantsF, local_constant, entry) {
3320         value = (const float *)lconst->value;
3321         snprintf(glsl_name, sizeof(glsl_name), "%cLC%u", prefix, lconst->idx);
3322         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
3323         GL_EXTCALL(glUniform4fvARB(tmp_loc, 1, value));
3324     }
3325     checkGLcall("Hardcoding local constants\n");
3326 }
3327
3328 /** Sets the GLSL program ID for the given pixel and vertex shader combination.
3329  * It sets the programId on the current StateBlock (because it should be called
3330  * inside of the DrawPrimitive() part of the render loop).
3331  *
3332  * If a program for the given combination does not exist, create one, and store
3333  * the program in the hash table.  If it creates a program, it will link the
3334  * given objects, too.
3335  */
3336 static void set_glsl_shader_program(IWineD3DDevice *iface, BOOL use_ps, BOOL use_vs) {
3337     IWineD3DDeviceImpl *This               = (IWineD3DDeviceImpl *)iface;
3338     struct shader_glsl_priv *priv          = This->shader_priv;
3339     const WineD3D_GL_Info *gl_info         = &This->adapter->gl_info;
3340     IWineD3DPixelShader  *pshader          = This->stateBlock->pixelShader;
3341     IWineD3DVertexShader *vshader          = This->stateBlock->vertexShader;
3342     struct glsl_shader_prog_link *entry    = NULL;
3343     GLhandleARB programId                  = 0;
3344     GLhandleARB reorder_shader_id          = 0;
3345     int i;
3346     char glsl_name[8];
3347     GLhandleARB vshader_id, pshader_id;
3348     struct ps_compile_args ps_compile_args;
3349     struct vs_compile_args vs_compile_args;
3350
3351     if(use_vs) {
3352         find_vs_compile_args((IWineD3DVertexShaderImpl*)This->stateBlock->vertexShader, This->stateBlock, &vs_compile_args);
3353     } else {
3354         /* FIXME: Do we really have to spend CPU cycles to generate a few zeroed bytes? */
3355         memset(&vs_compile_args, 0, sizeof(vs_compile_args));
3356     }
3357     if(use_ps) {
3358         find_ps_compile_args((IWineD3DPixelShaderImpl*)This->stateBlock->pixelShader, This->stateBlock, &ps_compile_args);
3359     } else {
3360         /* FIXME: Do we really have to spend CPU cycles to generate a few zeroed bytes? */
3361         memset(&ps_compile_args, 0, sizeof(ps_compile_args));
3362     }
3363     entry = get_glsl_program_entry(priv, vshader, pshader, &vs_compile_args, &ps_compile_args);
3364     if (entry) {
3365         priv->glsl_program = entry;
3366         return;
3367     }
3368
3369     /* If we get to this point, then no matching program exists, so we create one */
3370     programId = GL_EXTCALL(glCreateProgramObjectARB());
3371     TRACE("Created new GLSL shader program %u\n", programId);
3372
3373     /* Create the entry */
3374     entry = HeapAlloc(GetProcessHeap(), 0, sizeof(struct glsl_shader_prog_link));
3375     entry->programId = programId;
3376     entry->vshader = vshader;
3377     entry->pshader = pshader;
3378     entry->vs_args = vs_compile_args;
3379     entry->ps_args = ps_compile_args;
3380     entry->constant_version = 0;
3381     /* Add the hash table entry */
3382     add_glsl_program_entry(priv, entry);
3383
3384     /* Set the current program */
3385     priv->glsl_program = entry;
3386
3387     if(use_vs) {
3388         vshader_id = find_gl_vshader((IWineD3DVertexShaderImpl *) vshader, &vs_compile_args);
3389     } else {
3390         vshader_id = 0;
3391     }
3392
3393     /* Attach GLSL vshader */
3394     if (vshader_id) {
3395         int max_attribs = 16;   /* TODO: Will this always be the case? It is at the moment... */
3396         char tmp_name[10];
3397
3398         reorder_shader_id = generate_param_reorder_function(vshader, pshader, gl_info);
3399         TRACE("Attaching GLSL shader object %u to program %u\n", reorder_shader_id, programId);
3400         GL_EXTCALL(glAttachObjectARB(programId, reorder_shader_id));
3401         checkGLcall("glAttachObjectARB");
3402         /* Flag the reorder function for deletion, then it will be freed automatically when the program
3403          * is destroyed
3404          */
3405         GL_EXTCALL(glDeleteObjectARB(reorder_shader_id));
3406
3407         TRACE("Attaching GLSL shader object %u to program %u\n", vshader_id, programId);
3408         GL_EXTCALL(glAttachObjectARB(programId, vshader_id));
3409         checkGLcall("glAttachObjectARB");
3410
3411         /* Bind vertex attributes to a corresponding index number to match
3412          * the same index numbers as ARB_vertex_programs (makes loading
3413          * vertex attributes simpler).  With this method, we can use the
3414          * exact same code to load the attributes later for both ARB and
3415          * GLSL shaders.
3416          *
3417          * We have to do this here because we need to know the Program ID
3418          * in order to make the bindings work, and it has to be done prior
3419          * to linking the GLSL program. */
3420         for (i = 0; i < max_attribs; ++i) {
3421             if (((IWineD3DBaseShaderImpl*)vshader)->baseShader.reg_maps.attributes[i]) {
3422                 snprintf(tmp_name, sizeof(tmp_name), "attrib%i", i);
3423                 GL_EXTCALL(glBindAttribLocationARB(programId, i, tmp_name));
3424             }
3425         }
3426         checkGLcall("glBindAttribLocationARB");
3427
3428         list_add_head(&((IWineD3DBaseShaderImpl *)vshader)->baseShader.linked_programs, &entry->vshader_entry);
3429     }
3430
3431     if(use_ps) {
3432         pshader_id = find_gl_pshader((IWineD3DPixelShaderImpl *) pshader, &ps_compile_args);
3433     } else {
3434         pshader_id = 0;
3435     }
3436
3437     /* Attach GLSL pshader */
3438     if (pshader_id) {
3439         TRACE("Attaching GLSL shader object %u to program %u\n", pshader_id, programId);
3440         GL_EXTCALL(glAttachObjectARB(programId, pshader_id));
3441         checkGLcall("glAttachObjectARB");
3442
3443         list_add_head(&((IWineD3DBaseShaderImpl *)pshader)->baseShader.linked_programs, &entry->pshader_entry);
3444     }
3445
3446     /* Link the program */
3447     TRACE("Linking GLSL shader program %u\n", programId);
3448     GL_EXTCALL(glLinkProgramARB(programId));
3449     print_glsl_info_log(&GLINFO_LOCATION, programId);
3450
3451     entry->vuniformF_locations = HeapAlloc(GetProcessHeap(), 0, sizeof(GLhandleARB) * GL_LIMITS(vshader_constantsF));
3452     for (i = 0; i < GL_LIMITS(vshader_constantsF); ++i) {
3453         snprintf(glsl_name, sizeof(glsl_name), "VC[%i]", i);
3454         entry->vuniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
3455     }
3456     for (i = 0; i < MAX_CONST_I; ++i) {
3457         snprintf(glsl_name, sizeof(glsl_name), "VI[%i]", i);
3458         entry->vuniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
3459     }
3460     entry->puniformF_locations = HeapAlloc(GetProcessHeap(), 0, sizeof(GLhandleARB) * GL_LIMITS(pshader_constantsF));
3461     for (i = 0; i < GL_LIMITS(pshader_constantsF); ++i) {
3462         snprintf(glsl_name, sizeof(glsl_name), "PC[%i]", i);
3463         entry->puniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
3464     }
3465     for (i = 0; i < MAX_CONST_I; ++i) {
3466         snprintf(glsl_name, sizeof(glsl_name), "PI[%i]", i);
3467         entry->puniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
3468     }
3469
3470     if(pshader) {
3471         for(i = 0; i < ((IWineD3DPixelShaderImpl*)pshader)->numbumpenvmatconsts; i++) {
3472             char name[32];
3473             sprintf(name, "bumpenvmat%d", ((IWineD3DPixelShaderImpl*)pshader)->bumpenvmatconst[i].texunit);
3474             entry->bumpenvmat_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
3475             sprintf(name, "luminancescale%d", ((IWineD3DPixelShaderImpl*)pshader)->luminanceconst[i].texunit);
3476             entry->luminancescale_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
3477             sprintf(name, "luminanceoffset%d", ((IWineD3DPixelShaderImpl*)pshader)->luminanceconst[i].texunit);
3478             entry->luminanceoffset_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
3479         }
3480     }
3481
3482
3483     entry->posFixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "posFixup"));
3484     entry->ycorrection_location = GL_EXTCALL(glGetUniformLocationARB(programId, "ycorrection"));
3485     checkGLcall("Find glsl program uniform locations");
3486
3487     if (pshader
3488             && WINED3DSHADER_VERSION_MAJOR(((IWineD3DPixelShaderImpl *)pshader)->baseShader.reg_maps.shader_version) >= 3
3489             && ((IWineD3DPixelShaderImpl *)pshader)->declared_in_count > GL_LIMITS(glsl_varyings) / 4)
3490     {
3491         TRACE("Shader %d needs vertex color clamping disabled\n", programId);
3492         entry->vertex_color_clamp = GL_FALSE;
3493     } else {
3494         entry->vertex_color_clamp = GL_FIXED_ONLY_ARB;
3495     }
3496
3497     /* Set the shader to allow uniform loading on it */
3498     GL_EXTCALL(glUseProgramObjectARB(programId));
3499     checkGLcall("glUseProgramObjectARB(programId)");
3500
3501     /* Load the vertex and pixel samplers now. The function that finds the mappings makes sure
3502      * that it stays the same for each vertexshader-pixelshader pair(=linked glsl program). If
3503      * a pshader with fixed function pipeline is used there are no vertex samplers, and if a
3504      * vertex shader with fixed function pixel processing is used we make sure that the card
3505      * supports enough samplers to allow the max number of vertex samplers with all possible
3506      * fixed function fragment processing setups. So once the program is linked these samplers
3507      * won't change.
3508      */
3509     if(vshader_id) {
3510         /* Load vertex shader samplers */
3511         shader_glsl_load_vsamplers(gl_info, (IWineD3DStateBlock*)This->stateBlock, programId);
3512     }
3513     if(pshader_id) {
3514         /* Load pixel shader samplers */
3515         shader_glsl_load_psamplers(gl_info, (IWineD3DStateBlock*)This->stateBlock, programId);
3516     }
3517
3518     /* If the local constants do not have to be loaded with the environment constants,
3519      * load them now to have them hardcoded in the GLSL program. This saves some CPU cycles
3520      * later
3521      */
3522     if(pshader && !((IWineD3DPixelShaderImpl*)pshader)->baseShader.load_local_constsF) {
3523         hardcode_local_constants((IWineD3DBaseShaderImpl *) pshader, gl_info, programId, 'P');
3524     }
3525     if(vshader && !((IWineD3DVertexShaderImpl*)vshader)->baseShader.load_local_constsF) {
3526         hardcode_local_constants((IWineD3DBaseShaderImpl *) vshader, gl_info, programId, 'V');
3527     }
3528 }
3529
3530 static GLhandleARB create_glsl_blt_shader(const WineD3D_GL_Info *gl_info, enum tex_types tex_type)
3531 {
3532     GLhandleARB program_id;
3533     GLhandleARB vshader_id, pshader_id;
3534     static const char *blt_vshader[] =
3535     {
3536         "#version 120\n"
3537         "void main(void)\n"
3538         "{\n"
3539         "    gl_Position = gl_Vertex;\n"
3540         "    gl_FrontColor = vec4(1.0);\n"
3541         "    gl_TexCoord[0] = gl_MultiTexCoord0;\n"
3542         "}\n"
3543     };
3544
3545     static const char *blt_pshaders[tex_type_count] =
3546     {
3547         /* tex_1d */
3548         NULL,
3549         /* tex_2d */
3550         "#version 120\n"
3551         "uniform sampler2D sampler;\n"
3552         "void main(void)\n"
3553         "{\n"
3554         "    gl_FragDepth = texture2D(sampler, gl_TexCoord[0].xy).x;\n"
3555         "}\n",
3556         /* tex_3d */
3557         NULL,
3558         /* tex_cube */
3559         "#version 120\n"
3560         "uniform samplerCube sampler;\n"
3561         "void main(void)\n"
3562         "{\n"
3563         "    gl_FragDepth = textureCube(sampler, gl_TexCoord[0].xyz).x;\n"
3564         "}\n",
3565         /* tex_rect */
3566         "#version 120\n"
3567         "#extension GL_ARB_texture_rectangle : enable\n"
3568         "uniform sampler2DRect sampler;\n"
3569         "void main(void)\n"
3570         "{\n"
3571         "    gl_FragDepth = texture2DRect(sampler, gl_TexCoord[0].xy).x;\n"
3572         "}\n",
3573     };
3574
3575     if (!blt_pshaders[tex_type])
3576     {
3577         FIXME("tex_type %#x not supported\n", tex_type);
3578         tex_type = tex_2d;
3579     }
3580
3581     vshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
3582     GL_EXTCALL(glShaderSourceARB(vshader_id, 1, blt_vshader, NULL));
3583     GL_EXTCALL(glCompileShaderARB(vshader_id));
3584
3585     pshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
3586     GL_EXTCALL(glShaderSourceARB(pshader_id, 1, &blt_pshaders[tex_type], NULL));
3587     GL_EXTCALL(glCompileShaderARB(pshader_id));
3588
3589     program_id = GL_EXTCALL(glCreateProgramObjectARB());
3590     GL_EXTCALL(glAttachObjectARB(program_id, vshader_id));
3591     GL_EXTCALL(glAttachObjectARB(program_id, pshader_id));
3592     GL_EXTCALL(glLinkProgramARB(program_id));
3593
3594     print_glsl_info_log(&GLINFO_LOCATION, program_id);
3595
3596     /* Once linked we can mark the shaders for deletion. They will be deleted once the program
3597      * is destroyed
3598      */
3599     GL_EXTCALL(glDeleteObjectARB(vshader_id));
3600     GL_EXTCALL(glDeleteObjectARB(pshader_id));
3601     return program_id;
3602 }
3603
3604 static void shader_glsl_select(IWineD3DDevice *iface, BOOL usePS, BOOL useVS) {
3605     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
3606     struct shader_glsl_priv *priv = This->shader_priv;
3607     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
3608     GLhandleARB program_id = 0;
3609     GLenum old_vertex_color_clamp, current_vertex_color_clamp;
3610
3611     old_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
3612
3613     if (useVS || usePS) set_glsl_shader_program(iface, usePS, useVS);
3614     else priv->glsl_program = NULL;
3615
3616     current_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
3617
3618     if (old_vertex_color_clamp != current_vertex_color_clamp) {
3619         if (GL_SUPPORT(ARB_COLOR_BUFFER_FLOAT)) {
3620             GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, current_vertex_color_clamp));
3621             checkGLcall("glClampColorARB");
3622         } else {
3623             FIXME("vertex color clamp needs to be changed, but extension not supported.\n");
3624         }
3625     }
3626
3627     program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
3628     if (program_id) TRACE("Using GLSL program %u\n", program_id);
3629     GL_EXTCALL(glUseProgramObjectARB(program_id));
3630     checkGLcall("glUseProgramObjectARB");
3631 }
3632
3633 static void shader_glsl_select_depth_blt(IWineD3DDevice *iface, enum tex_types tex_type) {
3634     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
3635     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
3636     struct shader_glsl_priv *priv = This->shader_priv;
3637     GLhandleARB *blt_program = &priv->depth_blt_program[tex_type];
3638
3639     if (!*blt_program) {
3640         GLhandleARB loc;
3641         *blt_program = create_glsl_blt_shader(gl_info, tex_type);
3642         loc = GL_EXTCALL(glGetUniformLocationARB(*blt_program, "sampler"));
3643         GL_EXTCALL(glUseProgramObjectARB(*blt_program));
3644         GL_EXTCALL(glUniform1iARB(loc, 0));
3645     } else {
3646         GL_EXTCALL(glUseProgramObjectARB(*blt_program));
3647     }
3648 }
3649
3650 static void shader_glsl_deselect_depth_blt(IWineD3DDevice *iface) {
3651     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
3652     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
3653     struct shader_glsl_priv *priv = This->shader_priv;
3654     GLhandleARB program_id;
3655
3656     program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
3657     if (program_id) TRACE("Using GLSL program %u\n", program_id);
3658
3659     GL_EXTCALL(glUseProgramObjectARB(program_id));
3660     checkGLcall("glUseProgramObjectARB");
3661 }
3662
3663 static void shader_glsl_destroy(IWineD3DBaseShader *iface) {
3664     const struct list *linked_programs;
3665     IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *) iface;
3666     IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)This->baseShader.device;
3667     struct shader_glsl_priv *priv = device->shader_priv;
3668     const WineD3D_GL_Info *gl_info = &device->adapter->gl_info;
3669     IWineD3DPixelShaderImpl *ps = NULL;
3670     IWineD3DVertexShaderImpl *vs = NULL;
3671
3672     /* Note: Do not use QueryInterface here to find out which shader type this is because this code
3673      * can be called from IWineD3DBaseShader::Release
3674      */
3675     char pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version);
3676
3677     if(pshader) {
3678         ps = (IWineD3DPixelShaderImpl *) This;
3679         if(ps->num_gl_shaders == 0) return;
3680     } else {
3681         vs = (IWineD3DVertexShaderImpl *) This;
3682         if(vs->num_gl_shaders == 0) return;
3683     }
3684
3685     linked_programs = &This->baseShader.linked_programs;
3686
3687     TRACE("Deleting linked programs\n");
3688     if (linked_programs->next) {
3689         struct glsl_shader_prog_link *entry, *entry2;
3690
3691         if(pshader) {
3692             LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, pshader_entry) {
3693                 delete_glsl_program_entry(priv, gl_info, entry);
3694             }
3695         } else {
3696             LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
3697                 delete_glsl_program_entry(priv, gl_info, entry);
3698             }
3699         }
3700     }
3701
3702     if(pshader) {
3703         UINT i;
3704
3705         ENTER_GL();
3706         for(i = 0; i < ps->num_gl_shaders; i++) {
3707             TRACE("deleting pshader %u\n", ps->gl_shaders[i].prgId);
3708             GL_EXTCALL(glDeleteObjectARB(ps->gl_shaders[i].prgId));
3709             checkGLcall("glDeleteObjectARB");
3710         }
3711         LEAVE_GL();
3712         HeapFree(GetProcessHeap(), 0, ps->gl_shaders);
3713         ps->gl_shaders = NULL;
3714         ps->num_gl_shaders = 0;
3715         ps->shader_array_size = 0;
3716     } else {
3717         UINT i;
3718
3719         ENTER_GL();
3720         for(i = 0; i < vs->num_gl_shaders; i++) {
3721             TRACE("deleting vshader %u\n", vs->gl_shaders[i].prgId);
3722             GL_EXTCALL(glDeleteObjectARB(vs->gl_shaders[i].prgId));
3723             checkGLcall("glDeleteObjectARB");
3724         }
3725         LEAVE_GL();
3726         HeapFree(GetProcessHeap(), 0, vs->gl_shaders);
3727         vs->gl_shaders = NULL;
3728         vs->num_gl_shaders = 0;
3729         vs->shader_array_size = 0;
3730     }
3731 }
3732
3733 static unsigned int glsl_program_key_hash(const void *key)
3734 {
3735     const glsl_program_key_t *k = key;
3736
3737     unsigned int hash = ((DWORD_PTR) k->vshader) | ((DWORD_PTR) k->pshader) << 16;
3738     hash += ~(hash << 15);
3739     hash ^=  (hash >> 10);
3740     hash +=  (hash << 3);
3741     hash ^=  (hash >> 6);
3742     hash += ~(hash << 11);
3743     hash ^=  (hash >> 16);
3744
3745     return hash;
3746 }
3747
3748 static BOOL glsl_program_key_compare(const void *keya, const void *keyb)
3749 {
3750     const glsl_program_key_t *ka = keya;
3751     const glsl_program_key_t *kb = keyb;
3752
3753     return ka->vshader == kb->vshader && ka->pshader == kb->pshader &&
3754            (memcmp(&ka->ps_args, &kb->ps_args, sizeof(kb->ps_args)) == 0) &&
3755            (memcmp(&ka->vs_args, &kb->vs_args, sizeof(kb->vs_args)) == 0);
3756 }
3757
3758 static BOOL constant_heap_init(struct constant_heap *heap, unsigned int constant_count)
3759 {
3760     SIZE_T size = (constant_count + 1) * sizeof(*heap->entries) + constant_count * sizeof(*heap->positions);
3761     void *mem = HeapAlloc(GetProcessHeap(), 0, size);
3762
3763     if (!mem)
3764     {
3765         ERR("Failed to allocate memory\n");
3766         return FALSE;
3767     }
3768
3769     heap->entries = mem;
3770     heap->entries[1].version = 0;
3771     heap->positions = (unsigned int *)(heap->entries + constant_count + 1);
3772     heap->size = 1;
3773
3774     return TRUE;
3775 }
3776
3777 static void constant_heap_free(struct constant_heap *heap)
3778 {
3779     HeapFree(GetProcessHeap(), 0, heap->entries);
3780 }
3781
3782 static HRESULT shader_glsl_alloc(IWineD3DDevice *iface) {
3783     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
3784     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
3785     struct shader_glsl_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_glsl_priv));
3786     SIZE_T stack_size = wined3d_log2i(max(GL_LIMITS(vshader_constantsF), GL_LIMITS(pshader_constantsF))) + 1;
3787
3788     priv->stack = HeapAlloc(GetProcessHeap(), 0, stack_size * sizeof(*priv->stack));
3789     if (!priv->stack)
3790     {
3791         ERR("Failed to allocate memory.\n");
3792         HeapFree(GetProcessHeap(), 0, priv);
3793         return E_OUTOFMEMORY;
3794     }
3795
3796     if (!constant_heap_init(&priv->vconst_heap, GL_LIMITS(vshader_constantsF)))
3797     {
3798         ERR("Failed to initialize vertex shader constant heap\n");
3799         HeapFree(GetProcessHeap(), 0, priv->stack);
3800         HeapFree(GetProcessHeap(), 0, priv);
3801         return E_OUTOFMEMORY;
3802     }
3803
3804     if (!constant_heap_init(&priv->pconst_heap, GL_LIMITS(pshader_constantsF)))
3805     {
3806         ERR("Failed to initialize pixel shader constant heap\n");
3807         constant_heap_free(&priv->vconst_heap);
3808         HeapFree(GetProcessHeap(), 0, priv->stack);
3809         HeapFree(GetProcessHeap(), 0, priv);
3810         return E_OUTOFMEMORY;
3811     }
3812
3813     priv->glsl_program_lookup = hash_table_create(glsl_program_key_hash, glsl_program_key_compare);
3814     priv->next_constant_version = 1;
3815
3816     This->shader_priv = priv;
3817     return WINED3D_OK;
3818 }
3819
3820 static void shader_glsl_free(IWineD3DDevice *iface) {
3821     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
3822     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
3823     struct shader_glsl_priv *priv = This->shader_priv;
3824     int i;
3825
3826     for (i = 0; i < tex_type_count; ++i)
3827     {
3828         if (priv->depth_blt_program[i])
3829         {
3830             GL_EXTCALL(glDeleteObjectARB(priv->depth_blt_program[i]));
3831         }
3832     }
3833
3834     hash_table_destroy(priv->glsl_program_lookup, NULL, NULL);
3835     constant_heap_free(&priv->pconst_heap);
3836     constant_heap_free(&priv->vconst_heap);
3837
3838     HeapFree(GetProcessHeap(), 0, This->shader_priv);
3839     This->shader_priv = NULL;
3840 }
3841
3842 static BOOL shader_glsl_dirty_const(IWineD3DDevice *iface) {
3843     /* TODO: GL_EXT_bindable_uniform can be used to share constants across shaders */
3844     return FALSE;
3845 }
3846
3847 static GLuint shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer, const struct ps_compile_args *args) {
3848     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
3849     const struct shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
3850     CONST DWORD *function = This->baseShader.function;
3851     const char *fragcolor;
3852     const WineD3D_GL_Info *gl_info = &((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info;
3853
3854     /* Create the hw GLSL shader object and assign it as the shader->prgId */
3855     GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
3856
3857     shader_addline(buffer, "#version 120\n");
3858
3859     if (GL_SUPPORT(ARB_DRAW_BUFFERS)) {
3860         shader_addline(buffer, "#extension GL_ARB_draw_buffers : enable\n");
3861     }
3862     if (GL_SUPPORT(ARB_TEXTURE_RECTANGLE)) {
3863         /* The spec says that it doesn't have to be explicitly enabled, but the nvidia
3864          * drivers write a warning if we don't do so
3865          */
3866         shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n");
3867     }
3868
3869     /* Base Declarations */
3870     shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, args);
3871
3872     /* Pack 3.0 inputs */
3873     if (reg_maps->shader_version >= WINED3DPS_VERSION(3,0) && args->vp_mode != vertexshader) {
3874         pshader_glsl_input_pack(buffer, This->semantics_in, iface, args->vp_mode);
3875     }
3876
3877     /* Base Shader Body */
3878     shader_generate_main( (IWineD3DBaseShader*) This, buffer, reg_maps, function);
3879
3880     /* Pixel shaders < 2.0 place the resulting color in R0 implicitly */
3881     if (reg_maps->shader_version < WINED3DPS_VERSION(2,0))
3882     {
3883         /* Some older cards like GeforceFX ones don't support multiple buffers, so also not gl_FragData */
3884         if(GL_SUPPORT(ARB_DRAW_BUFFERS))
3885             shader_addline(buffer, "gl_FragData[0] = R0;\n");
3886         else
3887             shader_addline(buffer, "gl_FragColor = R0;\n");
3888     }
3889
3890     if(GL_SUPPORT(ARB_DRAW_BUFFERS)) {
3891         fragcolor = "gl_FragData[0]";
3892     } else {
3893         fragcolor = "gl_FragColor";
3894     }
3895     if(args->srgb_correction) {
3896         shader_addline(buffer, "tmp0.xyz = pow(%s.xyz, vec3(%f, %f, %f)) * vec3(%f, %f, %f) - vec3(%f, %f, %f);\n",
3897                         fragcolor, srgb_pow, srgb_pow, srgb_pow, srgb_mul_high, srgb_mul_high, srgb_mul_high,
3898                         srgb_sub_high, srgb_sub_high, srgb_sub_high);
3899         shader_addline(buffer, "tmp1.xyz = %s.xyz * srgb_mul_low.xyz;\n", fragcolor);
3900         shader_addline(buffer, "%s.x = %s.x < srgb_comparison.x ? tmp1.x : tmp0.x;\n", fragcolor, fragcolor);
3901         shader_addline(buffer, "%s.y = %s.y < srgb_comparison.y ? tmp1.y : tmp0.y;\n", fragcolor, fragcolor);
3902         shader_addline(buffer, "%s.z = %s.z < srgb_comparison.z ? tmp1.z : tmp0.z;\n", fragcolor, fragcolor);
3903         shader_addline(buffer, "%s = clamp(%s, 0.0, 1.0);\n", fragcolor, fragcolor);
3904     }
3905     /* Pixel shader < 3.0 do not replace the fog stage.
3906      * This implements linear fog computation and blending.
3907      * TODO: non linear fog
3908      * NOTE: gl_Fog.start and gl_Fog.end don't hold fog start s and end e but
3909      * -1/(e-s) and e/(e-s) respectively.
3910      */
3911     if(reg_maps->shader_version < WINED3DPS_VERSION(3,0)) {
3912         switch(args->fog) {
3913             case FOG_OFF: break;
3914             case FOG_LINEAR:
3915                 shader_addline(buffer, "float fogstart = -1.0 / (gl_Fog.end - gl_Fog.start);\n");
3916                 shader_addline(buffer, "float fogend = gl_Fog.end * -fogstart;\n");
3917                 shader_addline(buffer, "float Fog = clamp(gl_FogFragCoord * fogstart + fogend, 0.0, 1.0);\n");
3918                 shader_addline(buffer, "%s.xyz = mix(gl_Fog.color.xyz, %s.xyz, Fog);\n", fragcolor, fragcolor);
3919                 break;
3920             case FOG_EXP:
3921                 /* Fog = e^(-gl_Fog.density * gl_FogFragCoord) */
3922                 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_FogFragCoord);\n");
3923                 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n");
3924                 shader_addline(buffer, "%s.xyz = mix(gl_Fog.color.xyz, %s.xyz, Fog);\n", fragcolor, fragcolor);
3925                 break;
3926             case FOG_EXP2:
3927                 /* Fog = e^(-(gl_Fog.density * gl_FogFragCoord)^2) */
3928                 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_Fog.density * gl_FogFragCoord * gl_FogFragCoord);\n");
3929                 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n");
3930                 shader_addline(buffer, "%s.xyz = mix(gl_Fog.color.xyz, %s.xyz, Fog);\n", fragcolor, fragcolor);
3931                 break;
3932         }
3933     }
3934
3935     shader_addline(buffer, "}\n");
3936
3937     TRACE("Compiling shader object %u\n", shader_obj);
3938     GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer->buffer, NULL));
3939     GL_EXTCALL(glCompileShaderARB(shader_obj));
3940     print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
3941
3942     /* Store the shader object */
3943     return shader_obj;
3944 }
3945
3946 static GLuint shader_glsl_generate_vshader(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer, const struct vs_compile_args *args) {
3947     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
3948     const struct shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
3949     CONST DWORD *function = This->baseShader.function;
3950     const WineD3D_GL_Info *gl_info = &((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info;
3951
3952     /* Create the hw GLSL shader program and assign it as the shader->prgId */
3953     GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
3954
3955     shader_addline(buffer, "#version 120\n");
3956
3957     /* Base Declarations */
3958     shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, NULL);
3959
3960     /* Base Shader Body */
3961     shader_generate_main( (IWineD3DBaseShader*) This, buffer, reg_maps, function);
3962
3963     /* Unpack 3.0 outputs */
3964     if (reg_maps->shader_version >= WINED3DVS_VERSION(3,0)) shader_addline(buffer, "order_ps_input(OUT);\n");
3965     else shader_addline(buffer, "order_ps_input();\n");
3966
3967     /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used
3968      * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE),
3969      * the fog frag coord is thrown away. If the fog frag coord is used, but not written by
3970      * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0)
3971      */
3972     if(args->fog_src == VS_FOG_Z) {
3973         shader_addline(buffer, "gl_FogFragCoord = gl_Position.z;\n");
3974     } else if (!reg_maps->fog) {
3975         shader_addline(buffer, "gl_FogFragCoord = 0.0;\n");
3976     }
3977
3978     /* Write the final position.
3979      *
3980      * OpenGL coordinates specify the center of the pixel while d3d coords specify
3981      * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
3982      * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
3983      * contains 1.0 to allow a mad.
3984      */
3985     shader_addline(buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n");
3986     shader_addline(buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n");
3987
3988     /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
3989      *
3990      * Basically we want (in homogeneous coordinates) z = z * 2 - 1. However, shaders are run
3991      * before the homogeneous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
3992      * which is the same as z = z * 2 - w.
3993      */
3994     shader_addline(buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
3995
3996     shader_addline(buffer, "}\n");
3997
3998     TRACE("Compiling shader object %u\n", shader_obj);
3999     GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer->buffer, NULL));
4000     GL_EXTCALL(glCompileShaderARB(shader_obj));
4001     print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
4002
4003     return shader_obj;
4004 }
4005
4006 static void shader_glsl_get_caps(WINED3DDEVTYPE devtype, const WineD3D_GL_Info *gl_info, struct shader_caps *pCaps)
4007 {
4008     /* Nvidia Geforce6/7 or Ati R4xx/R5xx cards with GLSL support, support VS 3.0 but older Nvidia/Ati
4009      * models with GLSL support only support 2.0. In case of nvidia we can detect VS 2.0 support using
4010      * vs_nv_version which is based on NV_vertex_program.
4011      * For Ati cards there's no way using glsl (it abstracts the lowlevel info away) and also not
4012      * using ARB_vertex_program. It is safe to assume that when a card supports pixel shader 2.0 it
4013      * supports vertex shader 2.0 too and the way around. We can detect ps2.0 using the maximum number
4014      * of native instructions, so use that here. For more info see the pixel shader versioning code below.
4015      */
4016     if((GLINFO_LOCATION.vs_nv_version == VS_VERSION_20) || (GLINFO_LOCATION.ps_arb_max_instructions <= 512))
4017         pCaps->VertexShaderVersion = WINED3DVS_VERSION(2,0);
4018     else
4019         pCaps->VertexShaderVersion = WINED3DVS_VERSION(3,0);
4020     TRACE_(d3d_caps)("Hardware vertex shader version %d.%d enabled (GLSL)\n", (pCaps->VertexShaderVersion >> 8) & 0xff, pCaps->VertexShaderVersion & 0xff);
4021     pCaps->MaxVertexShaderConst = GL_LIMITS(vshader_constantsF);
4022
4023     /* Older DX9-class videocards (GeforceFX / Radeon >9500/X*00) only support pixel shader 2.0/2.0a/2.0b.
4024      * In OpenGL the extensions related to GLSL abstract lowlevel GL info away which is needed
4025      * to distinguish between 2.0 and 3.0 (and 2.0a/2.0b). In case of Nvidia we use their fragment
4026      * program extensions. On other hardware including ATI GL_ARB_fragment_program offers the info
4027      * in max native instructions. Intel and others also offer the info in this extension but they
4028      * don't support GLSL (at least on Windows).
4029      *
4030      * PS2.0 requires at least 96 instructions, 2.0a/2.0b go up to 512. Assume that if the number
4031      * of instructions is 512 or less we have to do with ps2.0 hardware.
4032      * NOTE: ps3.0 hardware requires 512 or more instructions but ati and nvidia offer 'enough' (1024 vs 4096) on their most basic ps3.0 hardware.
4033      */
4034     if((GLINFO_LOCATION.ps_nv_version == PS_VERSION_20) || (GLINFO_LOCATION.ps_arb_max_instructions <= 512))
4035         pCaps->PixelShaderVersion = WINED3DPS_VERSION(2,0);
4036     else
4037         pCaps->PixelShaderVersion = WINED3DPS_VERSION(3,0);
4038
4039     /* FIXME: The following line is card dependent. -8.0 to 8.0 is the
4040      * Direct3D minimum requirement.
4041      *
4042      * Both GL_ARB_fragment_program and GLSL require a "maximum representable magnitude"
4043      * of colors to be 2^10, and 2^32 for other floats. Should we use 1024 here?
4044      *
4045      * The problem is that the refrast clamps temporary results in the shader to
4046      * [-MaxValue;+MaxValue]. If the card's max value is bigger than the one we advertize here,
4047      * then applications may miss the clamping behavior. On the other hand, if it is smaller,
4048      * the shader will generate incorrect results too. Unfortunately, GL deliberately doesn't
4049      * offer a way to query this.
4050      */
4051     pCaps->PixelShader1xMaxValue = 8.0;
4052     TRACE_(d3d_caps)("Hardware pixel shader version %d.%d enabled (GLSL)\n", (pCaps->PixelShaderVersion >> 8) & 0xff, pCaps->PixelShaderVersion & 0xff);
4053 }
4054
4055 static BOOL shader_glsl_color_fixup_supported(struct color_fixup_desc fixup)
4056 {
4057     if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
4058     {
4059         TRACE("Checking support for fixup:\n");
4060         dump_color_fixup_desc(fixup);
4061     }
4062
4063     /* We support everything except YUV conversions. */
4064     if (!is_yuv_fixup(fixup))
4065     {
4066         TRACE("[OK]\n");
4067         return TRUE;
4068     }
4069
4070     TRACE("[FAILED]\n");
4071     return FALSE;
4072 }
4073
4074 static const SHADER_HANDLER shader_glsl_instruction_handler_table[WINED3DSIH_TABLE_SIZE] =
4075 {
4076     /* WINED3DSIH_ABS           */ shader_glsl_map2gl,
4077     /* WINED3DSIH_ADD           */ shader_glsl_arith,
4078     /* WINED3DSIH_BEM           */ pshader_glsl_bem,
4079     /* WINED3DSIH_BREAK         */ shader_glsl_break,
4080     /* WINED3DSIH_BREAKC        */ shader_glsl_breakc,
4081     /* WINED3DSIH_BREAKP        */ NULL,
4082     /* WINED3DSIH_CALL          */ shader_glsl_call,
4083     /* WINED3DSIH_CALLNZ        */ shader_glsl_callnz,
4084     /* WINED3DSIH_CMP           */ shader_glsl_cmp,
4085     /* WINED3DSIH_CND           */ shader_glsl_cnd,
4086     /* WINED3DSIH_CRS           */ shader_glsl_cross,
4087     /* WINED3DSIH_DCL           */ NULL,
4088     /* WINED3DSIH_DEF           */ NULL,
4089     /* WINED3DSIH_DEFB          */ NULL,
4090     /* WINED3DSIH_DEFI          */ NULL,
4091     /* WINED3DSIH_DP2ADD        */ pshader_glsl_dp2add,
4092     /* WINED3DSIH_DP3           */ shader_glsl_dot,
4093     /* WINED3DSIH_DP4           */ shader_glsl_dot,
4094     /* WINED3DSIH_DST           */ shader_glsl_dst,
4095     /* WINED3DSIH_DSX           */ shader_glsl_map2gl,
4096     /* WINED3DSIH_DSY           */ shader_glsl_map2gl,
4097     /* WINED3DSIH_ELSE          */ shader_glsl_else,
4098     /* WINED3DSIH_ENDIF         */ shader_glsl_end,
4099     /* WINED3DSIH_ENDLOOP       */ shader_glsl_end,
4100     /* WINED3DSIH_ENDREP        */ shader_glsl_end,
4101     /* WINED3DSIH_EXP           */ shader_glsl_map2gl,
4102     /* WINED3DSIH_EXPP          */ shader_glsl_expp,
4103     /* WINED3DSIH_FRC           */ shader_glsl_map2gl,
4104     /* WINED3DSIH_IF            */ shader_glsl_if,
4105     /* WINED3DSIH_IFC           */ shader_glsl_ifc,
4106     /* WINED3DSIH_LABEL         */ shader_glsl_label,
4107     /* WINED3DSIH_LIT           */ shader_glsl_lit,
4108     /* WINED3DSIH_LOG           */ shader_glsl_log,
4109     /* WINED3DSIH_LOGP          */ shader_glsl_log,
4110     /* WINED3DSIH_LOOP          */ shader_glsl_loop,
4111     /* WINED3DSIH_LRP           */ shader_glsl_lrp,
4112     /* WINED3DSIH_M3x2          */ shader_glsl_mnxn,
4113     /* WINED3DSIH_M3x3          */ shader_glsl_mnxn,
4114     /* WINED3DSIH_M3x4          */ shader_glsl_mnxn,
4115     /* WINED3DSIH_M4x3          */ shader_glsl_mnxn,
4116     /* WINED3DSIH_M4x4          */ shader_glsl_mnxn,
4117     /* WINED3DSIH_MAD           */ shader_glsl_mad,
4118     /* WINED3DSIH_MAX           */ shader_glsl_map2gl,
4119     /* WINED3DSIH_MIN           */ shader_glsl_map2gl,
4120     /* WINED3DSIH_MOV           */ shader_glsl_mov,
4121     /* WINED3DSIH_MOVA          */ shader_glsl_mov,
4122     /* WINED3DSIH_MUL           */ shader_glsl_arith,
4123     /* WINED3DSIH_NOP           */ NULL,
4124     /* WINED3DSIH_NRM           */ shader_glsl_map2gl,
4125     /* WINED3DSIH_PHASE         */ NULL,
4126     /* WINED3DSIH_POW           */ shader_glsl_pow,
4127     /* WINED3DSIH_RCP           */ shader_glsl_rcp,
4128     /* WINED3DSIH_REP           */ shader_glsl_rep,
4129     /* WINED3DSIH_RET           */ NULL,
4130     /* WINED3DSIH_RSQ           */ shader_glsl_rsq,
4131     /* WINED3DSIH_SETP          */ NULL,
4132     /* WINED3DSIH_SGE           */ shader_glsl_compare,
4133     /* WINED3DSIH_SGN           */ shader_glsl_map2gl,
4134     /* WINED3DSIH_SINCOS        */ shader_glsl_sincos,
4135     /* WINED3DSIH_SLT           */ shader_glsl_compare,
4136     /* WINED3DSIH_SUB           */ shader_glsl_arith,
4137     /* WINED3DSIH_TEX           */ pshader_glsl_tex,
4138     /* WINED3DSIH_TEXBEM        */ pshader_glsl_texbem,
4139     /* WINED3DSIH_TEXBEML       */ pshader_glsl_texbem,
4140     /* WINED3DSIH_TEXCOORD      */ pshader_glsl_texcoord,
4141     /* WINED3DSIH_TEXDEPTH      */ pshader_glsl_texdepth,
4142     /* WINED3DSIH_TEXDP3        */ pshader_glsl_texdp3,
4143     /* WINED3DSIH_TEXDP3TEX     */ pshader_glsl_texdp3tex,
4144     /* WINED3DSIH_TEXKILL       */ pshader_glsl_texkill,
4145     /* WINED3DSIH_TEXLDD        */ NULL,
4146     /* WINED3DSIH_TEXLDL        */ shader_glsl_texldl,
4147     /* WINED3DSIH_TEXM3x2DEPTH  */ pshader_glsl_texm3x2depth,
4148     /* WINED3DSIH_TEXM3x2PAD    */ pshader_glsl_texm3x2pad,
4149     /* WINED3DSIH_TEXM3x2TEX    */ pshader_glsl_texm3x2tex,
4150     /* WINED3DSIH_TEXM3x3       */ pshader_glsl_texm3x3,
4151     /* WINED3DSIH_TEXM3x3DIFF   */ NULL,
4152     /* WINED3DSIH_TEXM3x3PAD    */ pshader_glsl_texm3x3pad,
4153     /* WINED3DSIH_TEXM3x3SPEC   */ pshader_glsl_texm3x3spec,
4154     /* WINED3DSIH_TEXM3x3TEX    */ pshader_glsl_texm3x3tex,
4155     /* WINED3DSIH_TEXM3x3VSPEC  */ pshader_glsl_texm3x3vspec,
4156     /* WINED3DSIH_TEXREG2AR     */ pshader_glsl_texreg2ar,
4157     /* WINED3DSIH_TEXREG2GB     */ pshader_glsl_texreg2gb,
4158     /* WINED3DSIH_TEXREG2RGB    */ pshader_glsl_texreg2rgb,
4159 };
4160
4161 const shader_backend_t glsl_shader_backend = {
4162     shader_glsl_instruction_handler_table,
4163     shader_glsl_select,
4164     shader_glsl_select_depth_blt,
4165     shader_glsl_deselect_depth_blt,
4166     shader_glsl_update_float_vertex_constants,
4167     shader_glsl_update_float_pixel_constants,
4168     shader_glsl_load_constants,
4169     shader_glsl_destroy,
4170     shader_glsl_alloc,
4171     shader_glsl_free,
4172     shader_glsl_dirty_const,
4173     shader_glsl_generate_pshader,
4174     shader_glsl_generate_vshader,
4175     shader_glsl_get_caps,
4176     shader_glsl_color_fixup_supported,
4177 };