wintrust/tests: Remove superfluous pointer casts.
[wine] / dlls / wined3d / glsl_shader.c
1 /*
2  * GLSL pixel and vertex shader implementation
3  *
4  * Copyright 2006 Jason Green 
5  * Copyright 2006-2007 Henri Verbeet
6  * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21  */
22
23 /*
24  * D3D shader asm has swizzles on source parameters, and write masks for
25  * destination parameters. GLSL uses swizzles for both. The result of this is
26  * that for example "mov dst.xw, src.zyxw" becomes "dst.xw = src.zw" in GLSL.
27  * Ie, to generate a proper GLSL source swizzle, we need to take the D3D write
28  * mask for the destination parameter into account.
29  */
30
31 #include "config.h"
32 #include <limits.h>
33 #include <stdio.h>
34 #include "wined3d_private.h"
35
36 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
37 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
38 WINE_DECLARE_DEBUG_CHANNEL(d3d_caps);
39 WINE_DECLARE_DEBUG_CHANNEL(d3d);
40
41 #define GLINFO_LOCATION      (*gl_info)
42
43 typedef struct {
44     char reg_name[150];
45     char mask_str[6];
46 } glsl_dst_param_t;
47
48 typedef struct {
49     char reg_name[150];
50     char param_str[100];
51 } glsl_src_param_t;
52
53 typedef struct {
54     const char *name;
55     DWORD coord_mask;
56 } glsl_sample_function_t;
57
58 enum heap_node_op
59 {
60     HEAP_NODE_TRAVERSE_LEFT,
61     HEAP_NODE_TRAVERSE_RIGHT,
62     HEAP_NODE_POP,
63 };
64
65 struct constant_entry
66 {
67     unsigned int idx;
68     unsigned int version;
69 };
70
71 struct constant_heap
72 {
73     struct constant_entry *entries;
74     unsigned int *positions;
75     unsigned int size;
76 };
77
78 /* GLSL shader private data */
79 struct shader_glsl_priv {
80     struct hash_table_t *glsl_program_lookup;
81     struct glsl_shader_prog_link *glsl_program;
82     struct constant_heap vconst_heap;
83     struct constant_heap pconst_heap;
84     unsigned char *stack;
85     GLhandleARB depth_blt_program[tex_type_count];
86     UINT next_constant_version;
87 };
88
89 /* Struct to maintain data about a linked GLSL program */
90 struct glsl_shader_prog_link {
91     struct list                 vshader_entry;
92     struct list                 pshader_entry;
93     GLhandleARB                 programId;
94     GLhandleARB                 *vuniformF_locations;
95     GLhandleARB                 *puniformF_locations;
96     GLhandleARB                 vuniformI_locations[MAX_CONST_I];
97     GLhandleARB                 puniformI_locations[MAX_CONST_I];
98     GLhandleARB                 posFixup_location;
99     GLhandleARB                 bumpenvmat_location[MAX_TEXTURES];
100     GLhandleARB                 luminancescale_location[MAX_TEXTURES];
101     GLhandleARB                 luminanceoffset_location[MAX_TEXTURES];
102     GLhandleARB                 ycorrection_location;
103     GLenum                      vertex_color_clamp;
104     GLhandleARB                 vshader;
105     IWineD3DPixelShader         *pshader;
106     struct ps_compile_args      ps_args;
107     UINT                        constant_version;
108 };
109
110 typedef struct {
111     GLhandleARB                 vshader;
112     IWineD3DPixelShader         *pshader;
113     struct ps_compile_args      ps_args;
114 } glsl_program_key_t;
115
116
117 /** Prints the GLSL info log which will contain error messages if they exist */
118 static void print_glsl_info_log(const WineD3D_GL_Info *gl_info, GLhandleARB obj)
119 {
120     int infologLength = 0;
121     char *infoLog;
122     unsigned int i;
123     BOOL is_spam;
124
125     static const char * const spam[] =
126     {
127         "Vertex shader was successfully compiled to run on hardware.\n",    /* fglrx          */
128         "Fragment shader was successfully compiled to run on hardware.\n",  /* fglrx          */
129         "Fragment shader(s) linked, vertex shader(s) linked. \n ",          /* fglrx, with \n */
130         "Fragment shader(s) linked, vertex shader(s) linked.",              /* fglrx, no \n   */
131         "Vertex shader(s) linked, no fragment shader(s) defined. \n ",      /* fglrx, with \n */
132         "Vertex shader(s) linked, no fragment shader(s) defined.",          /* fglrx, no \n   */
133         "Fragment shader was successfully compiled to run on hardware.\n"
134         "WARNING: 0:2: extension 'GL_ARB_draw_buffers' is not supported",
135         "Fragment shader(s) linked, no vertex shader(s) defined.",          /* fglrx, no \n   */
136         "Fragment shader(s) linked, no vertex shader(s) defined. \n ",      /* fglrx, with \n */
137         "WARNING: 0:2: extension 'GL_ARB_draw_buffers' is not supported\n"  /* MacOS ati      */
138     };
139
140     if (!TRACE_ON(d3d_shader) && !FIXME_ON(d3d_shader)) return;
141
142     GL_EXTCALL(glGetObjectParameterivARB(obj,
143                GL_OBJECT_INFO_LOG_LENGTH_ARB,
144                &infologLength));
145
146     /* A size of 1 is just a null-terminated string, so the log should be bigger than
147      * that if there are errors. */
148     if (infologLength > 1)
149     {
150         /* Fglrx doesn't terminate the string properly, but it tells us the proper length.
151          * So use HEAP_ZERO_MEMORY to avoid uninitialized bytes
152          */
153         infoLog = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, infologLength);
154         GL_EXTCALL(glGetInfoLogARB(obj, infologLength, NULL, infoLog));
155         is_spam = FALSE;
156
157         for(i = 0; i < sizeof(spam) / sizeof(spam[0]); i++) {
158             if(strcmp(infoLog, spam[i]) == 0) {
159                 is_spam = TRUE;
160                 break;
161             }
162         }
163         if(is_spam) {
164             TRACE("Spam received from GLSL shader #%u: %s\n", obj, debugstr_a(infoLog));
165         } else {
166             FIXME("Error received from GLSL shader #%u: %s\n", obj, debugstr_a(infoLog));
167         }
168         HeapFree(GetProcessHeap(), 0, infoLog);
169     }
170 }
171
172 /**
173  * Loads (pixel shader) samplers
174  */
175 static void shader_glsl_load_psamplers(const WineD3D_GL_Info *gl_info, IWineD3DStateBlock *iface, GLhandleARB programId)
176 {
177     IWineD3DStateBlockImpl* stateBlock = (IWineD3DStateBlockImpl*) iface;
178     GLhandleARB name_loc;
179     int i;
180     char sampler_name[20];
181
182     for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) {
183         snprintf(sampler_name, sizeof(sampler_name), "Psampler%d", i);
184         name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
185         if (name_loc != -1) {
186             int mapped_unit = stateBlock->wineD3DDevice->texUnitMap[i];
187             if (mapped_unit != -1 && mapped_unit < GL_LIMITS(fragment_samplers)) {
188                 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
189                 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
190                 checkGLcall("glUniform1iARB");
191             } else {
192                 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
193             }
194         }
195     }
196 }
197
198 static void shader_glsl_load_vsamplers(const WineD3D_GL_Info *gl_info, IWineD3DStateBlock *iface, GLhandleARB programId)
199 {
200     IWineD3DStateBlockImpl* stateBlock = (IWineD3DStateBlockImpl*) iface;
201     GLhandleARB name_loc;
202     char sampler_name[20];
203     int i;
204
205     for (i = 0; i < MAX_VERTEX_SAMPLERS; ++i) {
206         snprintf(sampler_name, sizeof(sampler_name), "Vsampler%d", i);
207         name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
208         if (name_loc != -1) {
209             int mapped_unit = stateBlock->wineD3DDevice->texUnitMap[MAX_FRAGMENT_SAMPLERS + i];
210             if (mapped_unit != -1 && mapped_unit < GL_LIMITS(combined_samplers)) {
211                 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
212                 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
213                 checkGLcall("glUniform1iARB");
214             } else {
215                 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
216             }
217         }
218     }
219 }
220
221 static inline void walk_constant_heap(const WineD3D_GL_Info *gl_info, const float *constants,
222         const GLhandleARB *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
223 {
224     int stack_idx = 0;
225     unsigned int heap_idx = 1;
226     unsigned int idx;
227
228     if (heap->entries[heap_idx].version <= version) return;
229
230     idx = heap->entries[heap_idx].idx;
231     if (constant_locations[idx] != -1) GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
232     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
233
234     while (stack_idx >= 0)
235     {
236         /* Note that we fall through to the next case statement. */
237         switch(stack[stack_idx])
238         {
239             case HEAP_NODE_TRAVERSE_LEFT:
240             {
241                 unsigned int left_idx = heap_idx << 1;
242                 if (left_idx < heap->size && heap->entries[left_idx].version > version)
243                 {
244                     heap_idx = left_idx;
245                     idx = heap->entries[heap_idx].idx;
246                     if (constant_locations[idx] != -1)
247                         GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
248
249                     stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
250                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
251                     break;
252                 }
253             }
254
255             case HEAP_NODE_TRAVERSE_RIGHT:
256             {
257                 unsigned int right_idx = (heap_idx << 1) + 1;
258                 if (right_idx < heap->size && heap->entries[right_idx].version > version)
259                 {
260                     heap_idx = right_idx;
261                     idx = heap->entries[heap_idx].idx;
262                     if (constant_locations[idx] != -1)
263                         GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
264
265                     stack[stack_idx++] = HEAP_NODE_POP;
266                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
267                     break;
268                 }
269             }
270
271             case HEAP_NODE_POP:
272             {
273                 heap_idx >>= 1;
274                 --stack_idx;
275                 break;
276             }
277         }
278     }
279     checkGLcall("walk_constant_heap()");
280 }
281
282 static inline void apply_clamped_constant(const WineD3D_GL_Info *gl_info, GLint location, const GLfloat *data)
283 {
284     GLfloat clamped_constant[4];
285
286     if (location == -1) return;
287
288     clamped_constant[0] = data[0] < -1.0f ? -1.0f : data[0] > 1.0 ? 1.0 : data[0];
289     clamped_constant[1] = data[1] < -1.0f ? -1.0f : data[1] > 1.0 ? 1.0 : data[1];
290     clamped_constant[2] = data[2] < -1.0f ? -1.0f : data[2] > 1.0 ? 1.0 : data[2];
291     clamped_constant[3] = data[3] < -1.0f ? -1.0f : data[3] > 1.0 ? 1.0 : data[3];
292
293     GL_EXTCALL(glUniform4fvARB(location, 1, clamped_constant));
294 }
295
296 static inline void walk_constant_heap_clamped(const WineD3D_GL_Info *gl_info, const float *constants,
297         const GLhandleARB *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
298 {
299     int stack_idx = 0;
300     unsigned int heap_idx = 1;
301     unsigned int idx;
302
303     if (heap->entries[heap_idx].version <= version) return;
304
305     idx = heap->entries[heap_idx].idx;
306     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
307     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
308
309     while (stack_idx >= 0)
310     {
311         /* Note that we fall through to the next case statement. */
312         switch(stack[stack_idx])
313         {
314             case HEAP_NODE_TRAVERSE_LEFT:
315             {
316                 unsigned int left_idx = heap_idx << 1;
317                 if (left_idx < heap->size && heap->entries[left_idx].version > version)
318                 {
319                     heap_idx = left_idx;
320                     idx = heap->entries[heap_idx].idx;
321                     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
322
323                     stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
324                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
325                     break;
326                 }
327             }
328
329             case HEAP_NODE_TRAVERSE_RIGHT:
330             {
331                 unsigned int right_idx = (heap_idx << 1) + 1;
332                 if (right_idx < heap->size && heap->entries[right_idx].version > version)
333                 {
334                     heap_idx = right_idx;
335                     idx = heap->entries[heap_idx].idx;
336                     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
337
338                     stack[stack_idx++] = HEAP_NODE_POP;
339                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
340                     break;
341                 }
342             }
343
344             case HEAP_NODE_POP:
345             {
346                 heap_idx >>= 1;
347                 --stack_idx;
348                 break;
349             }
350         }
351     }
352     checkGLcall("walk_constant_heap_clamped()");
353 }
354
355 /* Loads floating point constants (aka uniforms) into the currently set GLSL program. */
356 static void shader_glsl_load_constantsF(IWineD3DBaseShaderImpl *This, const WineD3D_GL_Info *gl_info,
357         const float *constants, const GLhandleARB *constant_locations, const struct constant_heap *heap,
358         unsigned char *stack, UINT version)
359 {
360     const local_constant *lconst;
361
362     /* 1.X pshaders have the constants clamped to [-1;1] implicitly. */
363     if (WINED3DSHADER_VERSION_MAJOR(This->baseShader.reg_maps.shader_version) == 1
364             && shader_is_pshader_version(This->baseShader.reg_maps.shader_version))
365         walk_constant_heap_clamped(gl_info, constants, constant_locations, heap, stack, version);
366     else
367         walk_constant_heap(gl_info, constants, constant_locations, heap, stack, version);
368
369     if (!This->baseShader.load_local_constsF)
370     {
371         TRACE("No need to load local float constants for this shader\n");
372         return;
373     }
374
375     /* Immediate constants are clamped to [-1;1] at shader creation time if needed */
376     LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry)
377     {
378         GLhandleARB location = constant_locations[lconst->idx];
379         /* We found this uniform name in the program - go ahead and send the data */
380         if (location != -1) GL_EXTCALL(glUniform4fvARB(location, 1, (const GLfloat *)lconst->value));
381     }
382     checkGLcall("glUniform4fvARB()");
383 }
384
385 /* Loads integer constants (aka uniforms) into the currently set GLSL program. */
386 static void shader_glsl_load_constantsI(IWineD3DBaseShaderImpl *This, const WineD3D_GL_Info *gl_info,
387         const GLhandleARB locations[MAX_CONST_I], const int *constants, WORD constants_set)
388 {
389     unsigned int i;
390     struct list* ptr;
391
392     for (i = 0; constants_set; constants_set >>= 1, ++i)
393     {
394         if (!(constants_set & 1)) continue;
395
396         TRACE_(d3d_constants)("Loading constants %u: %i, %i, %i, %i\n",
397                 i, constants[i*4], constants[i*4+1], constants[i*4+2], constants[i*4+3]);
398
399         /* We found this uniform name in the program - go ahead and send the data */
400         GL_EXTCALL(glUniform4ivARB(locations[i], 1, &constants[i*4]));
401         checkGLcall("glUniform4ivARB");
402     }
403
404     /* Load immediate constants */
405     ptr = list_head(&This->baseShader.constantsI);
406     while (ptr) {
407         const struct local_constant *lconst = LIST_ENTRY(ptr, const struct local_constant, entry);
408         unsigned int idx = lconst->idx;
409         const GLint *values = (const GLint *)lconst->value;
410
411         TRACE_(d3d_constants)("Loading local constants %i: %i, %i, %i, %i\n", idx,
412             values[0], values[1], values[2], values[3]);
413
414         /* We found this uniform name in the program - go ahead and send the data */
415         GL_EXTCALL(glUniform4ivARB(locations[idx], 1, values));
416         checkGLcall("glUniform4ivARB");
417         ptr = list_next(&This->baseShader.constantsI, ptr);
418     }
419 }
420
421 /* Loads boolean constants (aka uniforms) into the currently set GLSL program. */
422 static void shader_glsl_load_constantsB(IWineD3DBaseShaderImpl *This, const WineD3D_GL_Info *gl_info,
423         GLhandleARB programId, const BOOL *constants, WORD constants_set)
424 {
425     GLhandleARB tmp_loc;
426     unsigned int i;
427     char tmp_name[8];
428     char is_pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version);
429     const char* prefix = is_pshader? "PB":"VB";
430     struct list* ptr;
431
432     /* TODO: Benchmark and see if it would be beneficial to store the
433      * locations of the constants to avoid looking up each time */
434     for (i = 0; constants_set; constants_set >>= 1, ++i)
435     {
436         if (!(constants_set & 1)) continue;
437
438         TRACE_(d3d_constants)("Loading constants %i: %i;\n", i, constants[i]);
439
440         /* TODO: Benchmark and see if it would be beneficial to store the
441          * locations of the constants to avoid looking up each time */
442         snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, i);
443         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
444         if (tmp_loc != -1)
445         {
446             /* We found this uniform name in the program - go ahead and send the data */
447             GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, &constants[i]));
448             checkGLcall("glUniform1ivARB");
449         }
450     }
451
452     /* Load immediate constants */
453     ptr = list_head(&This->baseShader.constantsB);
454     while (ptr) {
455         const struct local_constant *lconst = LIST_ENTRY(ptr, const struct local_constant, entry);
456         unsigned int idx = lconst->idx;
457         const GLint *values = (const GLint *)lconst->value;
458
459         TRACE_(d3d_constants)("Loading local constants %i: %i\n", idx, values[0]);
460
461         snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, idx);
462         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
463         if (tmp_loc != -1) {
464             /* We found this uniform name in the program - go ahead and send the data */
465             GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, values));
466             checkGLcall("glUniform1ivARB");
467         }
468         ptr = list_next(&This->baseShader.constantsB, ptr);
469     }
470 }
471
472 static void reset_program_constant_version(void *value, void *context)
473 {
474     struct glsl_shader_prog_link *entry = (struct glsl_shader_prog_link *)value;
475     entry->constant_version = 0;
476 }
477
478 /**
479  * Loads the app-supplied constants into the currently set GLSL program.
480  */
481 static void shader_glsl_load_constants(
482     IWineD3DDevice* device,
483     char usePixelShader,
484     char useVertexShader) {
485    
486     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) device;
487     struct shader_glsl_priv *priv = (struct shader_glsl_priv *)deviceImpl->shader_priv;
488     IWineD3DStateBlockImpl* stateBlock = deviceImpl->stateBlock;
489     const WineD3D_GL_Info *gl_info = &deviceImpl->adapter->gl_info;
490
491     GLhandleARB programId;
492     struct glsl_shader_prog_link *prog = priv->glsl_program;
493     UINT constant_version;
494     int i;
495
496     if (!prog) {
497         /* No GLSL program set - nothing to do. */
498         return;
499     }
500     programId = prog->programId;
501     constant_version = prog->constant_version;
502
503     if (useVertexShader) {
504         IWineD3DBaseShaderImpl* vshader = (IWineD3DBaseShaderImpl*) stateBlock->vertexShader;
505
506         /* Load DirectX 9 float constants/uniforms for vertex shader */
507         shader_glsl_load_constantsF(vshader, gl_info, stateBlock->vertexShaderConstantF,
508                 prog->vuniformF_locations, &priv->vconst_heap, priv->stack, constant_version);
509
510         /* Load DirectX 9 integer constants/uniforms for vertex shader */
511         if(vshader->baseShader.uses_int_consts) {
512             shader_glsl_load_constantsI(vshader, gl_info, prog->vuniformI_locations,
513                     stateBlock->vertexShaderConstantI, stateBlock->changed.vertexShaderConstantsI);
514         }
515
516         /* Load DirectX 9 boolean constants/uniforms for vertex shader */
517         if(vshader->baseShader.uses_bool_consts) {
518             shader_glsl_load_constantsB(vshader, gl_info, programId,
519                     stateBlock->vertexShaderConstantB, stateBlock->changed.vertexShaderConstantsB);
520         }
521
522         /* Upload the position fixup params */
523         GL_EXTCALL(glUniform4fvARB(prog->posFixup_location, 1, &deviceImpl->posFixup[0]));
524         checkGLcall("glUniform4fvARB");
525     }
526
527     if (usePixelShader) {
528
529         IWineD3DBaseShaderImpl* pshader = (IWineD3DBaseShaderImpl*) stateBlock->pixelShader;
530
531         /* Load DirectX 9 float constants/uniforms for pixel shader */
532         shader_glsl_load_constantsF(pshader, gl_info, stateBlock->pixelShaderConstantF,
533                 prog->puniformF_locations, &priv->pconst_heap, priv->stack, constant_version);
534
535         /* Load DirectX 9 integer constants/uniforms for pixel shader */
536         if(pshader->baseShader.uses_int_consts) {
537             shader_glsl_load_constantsI(pshader, gl_info, prog->puniformI_locations,
538                     stateBlock->pixelShaderConstantI, stateBlock->changed.pixelShaderConstantsI);
539         }
540
541         /* Load DirectX 9 boolean constants/uniforms for pixel shader */
542         if(pshader->baseShader.uses_bool_consts) {
543             shader_glsl_load_constantsB(pshader, gl_info, programId,
544                     stateBlock->pixelShaderConstantB, stateBlock->changed.pixelShaderConstantsB);
545         }
546
547         /* Upload the environment bump map matrix if needed. The needsbumpmat member specifies the texture stage to load the matrix from.
548          * It can't be 0 for a valid texbem instruction.
549          */
550         for(i = 0; i < ((IWineD3DPixelShaderImpl *) pshader)->numbumpenvmatconsts; i++) {
551             IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) pshader;
552             int stage = ps->luminanceconst[i].texunit;
553
554             const float *data = (const float *)&stateBlock->textureState[(int)ps->bumpenvmatconst[i].texunit][WINED3DTSS_BUMPENVMAT00];
555             GL_EXTCALL(glUniformMatrix2fvARB(prog->bumpenvmat_location[i], 1, 0, data));
556             checkGLcall("glUniformMatrix2fvARB");
557
558             /* texbeml needs the luminance scale and offset too. If texbeml is used, needsbumpmat
559              * is set too, so we can check that in the needsbumpmat check
560              */
561             if(ps->baseShader.reg_maps.luminanceparams[stage]) {
562                 const GLfloat *scale = (const GLfloat *)&stateBlock->textureState[stage][WINED3DTSS_BUMPENVLSCALE];
563                 const GLfloat *offset = (const GLfloat *)&stateBlock->textureState[stage][WINED3DTSS_BUMPENVLOFFSET];
564
565                 GL_EXTCALL(glUniform1fvARB(prog->luminancescale_location[i], 1, scale));
566                 checkGLcall("glUniform1fvARB");
567                 GL_EXTCALL(glUniform1fvARB(prog->luminanceoffset_location[i], 1, offset));
568                 checkGLcall("glUniform1fvARB");
569             }
570         }
571
572         if(((IWineD3DPixelShaderImpl *) pshader)->vpos_uniform) {
573             float correction_params[4];
574             if(deviceImpl->render_offscreen) {
575                 correction_params[0] = 0.0;
576                 correction_params[1] = 1.0;
577             } else {
578                 /* position is window relative, not viewport relative */
579                 correction_params[0] = ((IWineD3DSurfaceImpl *) deviceImpl->render_targets[0])->currentDesc.Height;
580                 correction_params[1] = -1.0;
581             }
582             GL_EXTCALL(glUniform4fvARB(prog->ycorrection_location, 1, correction_params));
583         }
584     }
585
586     if (priv->next_constant_version == UINT_MAX)
587     {
588         TRACE("Max constant version reached, resetting to 0.\n");
589         hash_table_for_each_entry(priv->glsl_program_lookup, reset_program_constant_version, NULL);
590         priv->next_constant_version = 1;
591     }
592     else
593     {
594         prog->constant_version = priv->next_constant_version++;
595     }
596 }
597
598 static inline void update_heap_entry(struct constant_heap *heap, unsigned int idx,
599         unsigned int heap_idx, DWORD new_version)
600 {
601     struct constant_entry *entries = heap->entries;
602     unsigned int *positions = heap->positions;
603     unsigned int parent_idx;
604
605     while (heap_idx > 1)
606     {
607         parent_idx = heap_idx >> 1;
608
609         if (new_version <= entries[parent_idx].version) break;
610
611         entries[heap_idx] = entries[parent_idx];
612         positions[entries[parent_idx].idx] = heap_idx;
613         heap_idx = parent_idx;
614     }
615
616     entries[heap_idx].version = new_version;
617     entries[heap_idx].idx = idx;
618     positions[idx] = heap_idx;
619 }
620
621 static void shader_glsl_update_float_vertex_constants(IWineD3DDevice *iface, UINT start, UINT count)
622 {
623     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
624     struct shader_glsl_priv *priv = (struct shader_glsl_priv *)This->shader_priv;
625     struct constant_heap *heap = &priv->vconst_heap;
626     UINT i;
627
628     for (i = start; i < count + start; ++i)
629     {
630         if (!This->stateBlock->changed.vertexShaderConstantsF[i])
631             update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
632         else
633             update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
634     }
635 }
636
637 static void shader_glsl_update_float_pixel_constants(IWineD3DDevice *iface, UINT start, UINT count)
638 {
639     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
640     struct shader_glsl_priv *priv = (struct shader_glsl_priv *)This->shader_priv;
641     struct constant_heap *heap = &priv->pconst_heap;
642     UINT i;
643
644     for (i = start; i < count + start; ++i)
645     {
646         if (!This->stateBlock->changed.pixelShaderConstantsF[i])
647             update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
648         else
649             update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
650     }
651 }
652
653 /** Generate the variable & register declarations for the GLSL output target */
654 static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const shader_reg_maps *reg_maps,
655         SHADER_BUFFER *buffer, const WineD3D_GL_Info *gl_info,
656         const struct ps_compile_args *ps_args)
657 {
658     IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
659     IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) This->baseShader.device;
660     DWORD shader_version = reg_maps->shader_version;
661     unsigned int i, extra_constants_needed = 0;
662     const local_constant *lconst;
663
664     /* There are some minor differences between pixel and vertex shaders */
665     char pshader = shader_is_pshader_version(shader_version);
666     char prefix = pshader ? 'P' : 'V';
667
668     /* Prototype the subroutines */
669     for (i = 0; i < This->baseShader.limits.label; i++) {
670         if (reg_maps->labels[i])
671             shader_addline(buffer, "void subroutine%u();\n", i);
672     }
673
674     /* Declare the constants (aka uniforms) */
675     if (This->baseShader.limits.constant_float > 0) {
676         unsigned max_constantsF = min(This->baseShader.limits.constant_float, 
677                 (pshader ? GL_LIMITS(pshader_constantsF) : GL_LIMITS(vshader_constantsF)));
678         shader_addline(buffer, "uniform vec4 %cC[%u];\n", prefix, max_constantsF);
679     }
680
681     if (This->baseShader.limits.constant_int > 0)
682         shader_addline(buffer, "uniform ivec4 %cI[%u];\n", prefix, This->baseShader.limits.constant_int);
683
684     if (This->baseShader.limits.constant_bool > 0)
685         shader_addline(buffer, "uniform bool %cB[%u];\n", prefix, This->baseShader.limits.constant_bool);
686
687     if(!pshader) {
688         shader_addline(buffer, "uniform vec4 posFixup;\n");
689         /* Predeclaration; This function is added at link time based on the pixel shader.
690          * VS 3.0 shaders have an array OUT[] the shader writes to, earlier versions don't have
691          * that. We know the input to the reorder function at vertex shader compile time, so
692          * we can deal with that. The reorder function for a 1.x and 2.x vertex shader can just
693          * read gl_FrontColor. The output depends on the pixel shader. The reorder function for a
694          * 1.x and 2.x pshader or for fixed function will write gl_FrontColor, and for a 3.0 shader
695          * it will write to the varying array. Here we depend on the shader optimizer on sorting that
696          * out. The nvidia driver only does that if the parameter is inout instead of out, hence the
697          * inout.
698          */
699         if (shader_version >= WINED3DVS_VERSION(3, 0))
700         {
701             shader_addline(buffer, "void order_ps_input(in vec4[%u]);\n", MAX_REG_OUTPUT);
702         } else {
703             shader_addline(buffer, "void order_ps_input();\n");
704         }
705     } else {
706         IWineD3DPixelShaderImpl *ps_impl = (IWineD3DPixelShaderImpl *) This;
707
708         ps_impl->numbumpenvmatconsts = 0;
709         for(i = 0; i < (sizeof(reg_maps->bumpmat) / sizeof(reg_maps->bumpmat[0])); i++) {
710             if(!reg_maps->bumpmat[i]) {
711                 continue;
712             }
713
714             ps_impl->bumpenvmatconst[(int) ps_impl->numbumpenvmatconsts].texunit = i;
715             shader_addline(buffer, "uniform mat2 bumpenvmat%d;\n", i);
716
717             if(reg_maps->luminanceparams) {
718                 ps_impl->luminanceconst[(int) ps_impl->numbumpenvmatconsts].texunit = i;
719                 shader_addline(buffer, "uniform float luminancescale%d;\n", i);
720                 shader_addline(buffer, "uniform float luminanceoffset%d;\n", i);
721                 extra_constants_needed++;
722             } else {
723                 ps_impl->luminanceconst[(int) ps_impl->numbumpenvmatconsts].texunit = -1;
724             }
725
726             extra_constants_needed++;
727             ps_impl->numbumpenvmatconsts++;
728         }
729
730         if(ps_args->srgb_correction) {
731             shader_addline(buffer, "const vec4 srgb_mul_low = vec4(%f, %f, %f, %f);\n",
732                             srgb_mul_low, srgb_mul_low, srgb_mul_low, srgb_mul_low);
733             shader_addline(buffer, "const vec4 srgb_comparison = vec4(%f, %f, %f, %f);\n",
734                             srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp);
735         }
736         if(reg_maps->vpos || reg_maps->usesdsy) {
737             if(This->baseShader.limits.constant_float + extra_constants_needed + 1 < GL_LIMITS(pshader_constantsF)) {
738                 shader_addline(buffer, "uniform vec4 ycorrection;\n");
739                 ((IWineD3DPixelShaderImpl *) This)->vpos_uniform = 1;
740                 extra_constants_needed++;
741             } else {
742                 /* This happens because we do not have proper tracking of the constant registers that are
743                  * actually used, only the max limit of the shader version
744                  */
745                 FIXME("Cannot find a free uniform for vpos correction params\n");
746                 shader_addline(buffer, "const vec4 ycorrection = vec4(%f, %f, 0.0, 0.0);\n",
747                                device->render_offscreen ? 0.0 : ((IWineD3DSurfaceImpl *) device->render_targets[0])->currentDesc.Height,
748                                device->render_offscreen ? 1.0 : -1.0);
749             }
750             shader_addline(buffer, "vec4 vpos;\n");
751         }
752     }
753
754     /* Declare texture samplers */ 
755     for (i = 0; i < This->baseShader.limits.sampler; i++) {
756         if (reg_maps->samplers[i]) {
757
758             DWORD stype = reg_maps->samplers[i] & WINED3DSP_TEXTURETYPE_MASK;
759             switch (stype) {
760
761                 case WINED3DSTT_1D:
762                     shader_addline(buffer, "uniform sampler1D %csampler%u;\n", prefix, i);
763                     break;
764                 case WINED3DSTT_2D:
765                     if(device->stateBlock->textures[i] &&
766                        IWineD3DBaseTexture_GetTextureDimensions(device->stateBlock->textures[i]) == GL_TEXTURE_RECTANGLE_ARB) {
767                         shader_addline(buffer, "uniform sampler2DRect %csampler%u;\n", prefix, i);
768                     } else {
769                         shader_addline(buffer, "uniform sampler2D %csampler%u;\n", prefix, i);
770                     }
771                     break;
772                 case WINED3DSTT_CUBE:
773                     shader_addline(buffer, "uniform samplerCube %csampler%u;\n", prefix, i);
774                     break;
775                 case WINED3DSTT_VOLUME:
776                     shader_addline(buffer, "uniform sampler3D %csampler%u;\n", prefix, i);
777                     break;
778                 default:
779                     shader_addline(buffer, "uniform unsupported_sampler %csampler%u;\n", prefix, i);
780                     FIXME("Unrecognized sampler type: %#x\n", stype);
781                     break;
782             }
783         }
784     }
785     
786     /* Declare address variables */
787     for (i = 0; i < This->baseShader.limits.address; i++) {
788         if (reg_maps->address[i])
789             shader_addline(buffer, "ivec4 A%d;\n", i);
790     }
791
792     /* Declare texture coordinate temporaries and initialize them */
793     for (i = 0; i < This->baseShader.limits.texcoord; i++) {
794         if (reg_maps->texcoord[i]) 
795             shader_addline(buffer, "vec4 T%u = gl_TexCoord[%u];\n", i, i);
796     }
797
798     /* Declare input register varyings. Only pixel shader, vertex shaders have that declared in the
799      * helper function shader that is linked in at link time
800      */
801     if (pshader && shader_version >= WINED3DPS_VERSION(3, 0))
802     {
803         if (use_vs(device->stateBlock))
804         {
805             shader_addline(buffer, "varying vec4 IN[%u];\n", GL_LIMITS(glsl_varyings) / 4);
806         } else {
807             /* TODO: Write a replacement shader for the fixed function vertex pipeline, so this isn't needed.
808              * For fixed function vertex processing + 3.0 pixel shader we need a separate function in the
809              * pixel shader that reads the fixed function color into the packed input registers.
810              */
811             shader_addline(buffer, "vec4 IN[%u];\n", GL_LIMITS(glsl_varyings) / 4);
812         }
813     }
814
815     /* Declare output register temporaries */
816     if(This->baseShader.limits.packed_output) {
817         shader_addline(buffer, "vec4 OUT[%u];\n", This->baseShader.limits.packed_output);
818     }
819
820     /* Declare temporary variables */
821     for(i = 0; i < This->baseShader.limits.temporary; i++) {
822         if (reg_maps->temporary[i])
823             shader_addline(buffer, "vec4 R%u;\n", i);
824     }
825
826     /* Declare attributes */
827     for (i = 0; i < This->baseShader.limits.attributes; i++) {
828         if (reg_maps->attributes[i])
829             shader_addline(buffer, "attribute vec4 attrib%i;\n", i);
830     }
831
832     /* Declare loop registers aLx */
833     for (i = 0; i < reg_maps->loop_depth; i++) {
834         shader_addline(buffer, "int aL%u;\n", i);
835         shader_addline(buffer, "int tmpInt%u;\n", i);
836     }
837
838     /* Temporary variables for matrix operations */
839     shader_addline(buffer, "vec4 tmp0;\n");
840     shader_addline(buffer, "vec4 tmp1;\n");
841
842     /* Local constants use a different name so they can be loaded once at shader link time
843      * They can't be hardcoded into the shader text via LC = {x, y, z, w}; because the
844      * float -> string conversion can cause precision loss.
845      */
846     if(!This->baseShader.load_local_constsF) {
847         LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
848             shader_addline(buffer, "uniform vec4 %cLC%u;\n", prefix, lconst->idx);
849         }
850     }
851
852     /* Start the main program */
853     shader_addline(buffer, "void main() {\n");
854     if(pshader && reg_maps->vpos) {
855         /* DirectX apps expect integer values, while OpenGL drivers add approximately 0.5. This causes
856          * off-by-one problems as spotted by the vPos d3d9 visual test. Unfortunately the ATI cards do
857          * not add exactly 0.5, but rather something like 0.49999999 or 0.50000001, which still causes
858          * precision troubles when we just substract 0.5.
859          *
860          * To deal with that just floor() the position. This will eliminate the fraction on all cards.
861          *
862          * TODO: Test how that behaves with multisampling once we can enable multisampling in winex11.
863          *
864          * An advantage of floor is that it works even if the driver doesn't add 1/2. It is somewhat
865          * questionable if 1.5, 2.5, ... are the proper values to return in gl_FragCoord, even though
866          * coordinates specify the pixel centers instead of the pixel corners. This code will behave
867          * correctly on drivers that returns integer values.
868          */
869         shader_addline(buffer, "vpos = floor(vec4(0, ycorrection[0], 0, 0) + gl_FragCoord * vec4(1, ycorrection[1], 1, 1));\n");
870     }
871 }
872
873 /*****************************************************************************
874  * Functions to generate GLSL strings from DirectX Shader bytecode begin here.
875  *
876  * For more information, see http://wiki.winehq.org/DirectX-Shaders
877  ****************************************************************************/
878
879 /* Prototypes */
880 static void shader_glsl_add_src_param(const SHADER_OPCODE_ARG *arg, const DWORD param,
881         const DWORD addr_token, DWORD mask, glsl_src_param_t *src_param);
882
883 /** Used for opcode modifiers - They multiply the result by the specified amount */
884 static const char * const shift_glsl_tab[] = {
885     "",           /*  0 (none) */ 
886     "2.0 * ",     /*  1 (x2)   */ 
887     "4.0 * ",     /*  2 (x4)   */ 
888     "8.0 * ",     /*  3 (x8)   */ 
889     "16.0 * ",    /*  4 (x16)  */ 
890     "32.0 * ",    /*  5 (x32)  */ 
891     "",           /*  6 (x64)  */ 
892     "",           /*  7 (x128) */ 
893     "",           /*  8 (d256) */ 
894     "",           /*  9 (d128) */ 
895     "",           /* 10 (d64)  */ 
896     "",           /* 11 (d32)  */ 
897     "0.0625 * ",  /* 12 (d16)  */ 
898     "0.125 * ",   /* 13 (d8)   */ 
899     "0.25 * ",    /* 14 (d4)   */ 
900     "0.5 * "      /* 15 (d2)   */ 
901 };
902
903 /* Generate a GLSL parameter that does the input modifier computation and return the input register/mask to use */
904 static void shader_glsl_gen_modifier (
905     const DWORD instr,
906     const char *in_reg,
907     const char *in_regswizzle,
908     char *out_str) {
909
910     out_str[0] = 0;
911     
912     if (instr == WINED3DSIO_TEXKILL)
913         return;
914
915     switch (instr & WINED3DSP_SRCMOD_MASK) {
916     case WINED3DSPSM_DZ: /* Need to handle this in the instructions itself (texld & texcrd). */
917     case WINED3DSPSM_DW:
918     case WINED3DSPSM_NONE:
919         sprintf(out_str, "%s%s", in_reg, in_regswizzle);
920         break;
921     case WINED3DSPSM_NEG:
922         sprintf(out_str, "-%s%s", in_reg, in_regswizzle);
923         break;
924     case WINED3DSPSM_NOT:
925         sprintf(out_str, "!%s%s", in_reg, in_regswizzle);
926         break;
927     case WINED3DSPSM_BIAS:
928         sprintf(out_str, "(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
929         break;
930     case WINED3DSPSM_BIASNEG:
931         sprintf(out_str, "-(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
932         break;
933     case WINED3DSPSM_SIGN:
934         sprintf(out_str, "(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
935         break;
936     case WINED3DSPSM_SIGNNEG:
937         sprintf(out_str, "-(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
938         break;
939     case WINED3DSPSM_COMP:
940         sprintf(out_str, "(1.0 - %s%s)", in_reg, in_regswizzle);
941         break;
942     case WINED3DSPSM_X2:
943         sprintf(out_str, "(2.0 * %s%s)", in_reg, in_regswizzle);
944         break;
945     case WINED3DSPSM_X2NEG:
946         sprintf(out_str, "-(2.0 * %s%s)", in_reg, in_regswizzle);
947         break;
948     case WINED3DSPSM_ABS:
949         sprintf(out_str, "abs(%s%s)", in_reg, in_regswizzle);
950         break;
951     case WINED3DSPSM_ABSNEG:
952         sprintf(out_str, "-abs(%s%s)", in_reg, in_regswizzle);
953         break;
954     default:
955         FIXME("Unhandled modifier %u\n", (instr & WINED3DSP_SRCMOD_MASK));
956         sprintf(out_str, "%s%s", in_reg, in_regswizzle);
957     }
958 }
959
960 /** Writes the GLSL variable name that corresponds to the register that the
961  * DX opcode parameter is trying to access */
962 static void shader_glsl_get_register_name(const DWORD param, const DWORD addr_token,
963         char *regstr, BOOL *is_color, const SHADER_OPCODE_ARG *arg)
964 {
965     /* oPos, oFog and oPts in D3D */
966     static const char * const hwrastout_reg_names[] = { "gl_Position", "gl_FogFragCoord", "gl_PointSize" };
967
968     DWORD reg = param & WINED3DSP_REGNUM_MASK;
969     DWORD regtype = shader_get_regtype(param);
970     IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) arg->shader;
971     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
972     const WineD3D_GL_Info* gl_info = &deviceImpl->adapter->gl_info;
973     DWORD shader_version = This->baseShader.reg_maps.shader_version;
974     char pshader = shader_is_pshader_version(shader_version);
975     char tmpStr[150];
976
977     *is_color = FALSE;   
978  
979     switch (regtype) {
980     case WINED3DSPR_TEMP:
981         sprintf(tmpStr, "R%u", reg);
982     break;
983     case WINED3DSPR_INPUT:
984         if (pshader) {
985             /* Pixel shaders >= 3.0 */
986             if (WINED3DSHADER_VERSION_MAJOR(shader_version) >= 3)
987             {
988                 DWORD in_count = GL_LIMITS(glsl_varyings) / 4;
989
990                 if (param & WINED3DSHADER_ADDRMODE_RELATIVE) {
991                     glsl_src_param_t rel_param;
992                     shader_glsl_add_src_param(arg, addr_token, 0, WINED3DSP_WRITEMASK_0, &rel_param);
993
994                     /* Removing a + 0 would be an obvious optimization, but macos doesn't see the NOP
995                      * operation there
996                      */
997                     if(((IWineD3DPixelShaderImpl *) This)->input_reg_map[reg]) {
998                         if (((IWineD3DPixelShaderImpl *)This)->declared_in_count > in_count) {
999                             sprintf(tmpStr, "((%s + %u) > %d ? (%s + %u) > %d ? gl_SecondaryColor : gl_Color : IN[%s + %u])",
1000                                     rel_param.param_str, ((IWineD3DPixelShaderImpl *)This)->input_reg_map[reg], in_count - 1,
1001                                     rel_param.param_str, ((IWineD3DPixelShaderImpl *)This)->input_reg_map[reg], in_count,
1002                                     rel_param.param_str, ((IWineD3DPixelShaderImpl *)This)->input_reg_map[reg]);
1003                         } else {
1004                             sprintf(tmpStr, "IN[%s + %u]", rel_param.param_str, ((IWineD3DPixelShaderImpl *)This)->input_reg_map[reg]);
1005                         }
1006                     } else {
1007                         if (((IWineD3DPixelShaderImpl *)This)->declared_in_count > in_count) {
1008                             sprintf(tmpStr, "((%s) > %d ? (%s) > %d ? gl_SecondaryColor : gl_Color : IN[%s])",
1009                                     rel_param.param_str, in_count - 1,
1010                                     rel_param.param_str, in_count,
1011                                     rel_param.param_str);
1012                         } else {
1013                             sprintf(tmpStr, "IN[%s]", rel_param.param_str);
1014                         }
1015                     }
1016                 } else {
1017                     DWORD idx = ((IWineD3DPixelShaderImpl *) This)->input_reg_map[reg];
1018                     if (idx == in_count) {
1019                         sprintf(tmpStr, "gl_Color");
1020                     } else if (idx == in_count + 1) {
1021                         sprintf(tmpStr, "gl_SecondaryColor");
1022                     } else {
1023                         sprintf(tmpStr, "IN[%u]", idx);
1024                     }
1025                 }
1026             } else {
1027                 if (reg==0)
1028                     strcpy(tmpStr, "gl_Color");
1029                 else
1030                     strcpy(tmpStr, "gl_SecondaryColor");
1031             }
1032         } else {
1033             if (((IWineD3DVertexShaderImpl *)This)->swizzle_map & (1 << reg)) *is_color = TRUE;
1034             sprintf(tmpStr, "attrib%u", reg);
1035         } 
1036         break;
1037     case WINED3DSPR_CONST:
1038     {
1039         const char prefix = pshader? 'P':'V';
1040
1041         /* Relative addressing */
1042         if (param & WINED3DSHADER_ADDRMODE_RELATIVE) {
1043
1044            /* Relative addressing on shaders 2.0+ have a relative address token, 
1045             * prior to that, it was hard-coded as "A0.x" because there's only 1 register */
1046            if (WINED3DSHADER_VERSION_MAJOR(shader_version) >= 2)
1047            {
1048                glsl_src_param_t rel_param;
1049                shader_glsl_add_src_param(arg, addr_token, 0, WINED3DSP_WRITEMASK_0, &rel_param);
1050                if(reg) {
1051                    sprintf(tmpStr, "%cC[%s + %u]", prefix, rel_param.param_str, reg);
1052                } else {
1053                    sprintf(tmpStr, "%cC[%s]", prefix, rel_param.param_str);
1054                }
1055            } else {
1056                if(reg) {
1057                    sprintf(tmpStr, "%cC[A0.x + %u]", prefix, reg);
1058                } else {
1059                    sprintf(tmpStr, "%cC[A0.x]", prefix);
1060                }
1061            }
1062
1063         } else {
1064             if(shader_constant_is_local(This, reg)) {
1065                 sprintf(tmpStr, "%cLC%u", prefix, reg);
1066             } else {
1067                 sprintf(tmpStr, "%cC[%u]", prefix, reg);
1068             }
1069         }
1070
1071         break;
1072     }
1073     case WINED3DSPR_CONSTINT:
1074         if (pshader)
1075             sprintf(tmpStr, "PI[%u]", reg);
1076         else
1077             sprintf(tmpStr, "VI[%u]", reg);
1078         break;
1079     case WINED3DSPR_CONSTBOOL:
1080         if (pshader)
1081             sprintf(tmpStr, "PB[%u]", reg);
1082         else
1083             sprintf(tmpStr, "VB[%u]", reg);
1084         break;
1085     case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
1086         if (pshader) {
1087             sprintf(tmpStr, "T%u", reg);
1088         } else {
1089             sprintf(tmpStr, "A%u", reg);
1090         }
1091     break;
1092     case WINED3DSPR_LOOP:
1093         sprintf(tmpStr, "aL%u", This->baseShader.cur_loop_regno - 1);
1094     break;
1095     case WINED3DSPR_SAMPLER:
1096         if (pshader)
1097             sprintf(tmpStr, "Psampler%u", reg);
1098         else
1099             sprintf(tmpStr, "Vsampler%u", reg);
1100     break;
1101     case WINED3DSPR_COLOROUT:
1102         if (reg >= GL_LIMITS(buffers)) {
1103             WARN("Write to render target %u, only %d supported\n", reg, 4);
1104         }
1105         if (GL_SUPPORT(ARB_DRAW_BUFFERS)) {
1106             sprintf(tmpStr, "gl_FragData[%u]", reg);
1107         } else { /* On older cards with GLSL support like the GeforceFX there's only one buffer. */
1108             sprintf(tmpStr, "gl_FragColor");
1109         }
1110     break;
1111     case WINED3DSPR_RASTOUT:
1112         sprintf(tmpStr, "%s", hwrastout_reg_names[reg]);
1113     break;
1114     case WINED3DSPR_DEPTHOUT:
1115         sprintf(tmpStr, "gl_FragDepth");
1116     break;
1117     case WINED3DSPR_ATTROUT:
1118         if (reg == 0) {
1119             sprintf(tmpStr, "gl_FrontColor");
1120         } else {
1121             sprintf(tmpStr, "gl_FrontSecondaryColor");
1122         }
1123     break;
1124     case WINED3DSPR_TEXCRDOUT:
1125         /* Vertex shaders >= 3.0: WINED3DSPR_OUTPUT */
1126         if (WINED3DSHADER_VERSION_MAJOR(shader_version) >= 3) sprintf(tmpStr, "OUT[%u]", reg);
1127         else sprintf(tmpStr, "gl_TexCoord[%u]", reg);
1128     break;
1129     case WINED3DSPR_MISCTYPE:
1130         if (reg == 0) {
1131             /* vPos */
1132             sprintf(tmpStr, "vpos");
1133         } else if (reg == 1){
1134             /* Note that gl_FrontFacing is a bool, while vFace is
1135              * a float for which the sign determines front/back
1136              */
1137             sprintf(tmpStr, "(gl_FrontFacing ? 1.0 : -1.0)");
1138         } else {
1139             FIXME("Unhandled misctype register %d\n", reg);
1140             sprintf(tmpStr, "unrecognized_register");
1141         }
1142         break;
1143     default:
1144         FIXME("Unhandled register name Type(%d)\n", regtype);
1145         sprintf(tmpStr, "unrecognized_register");
1146     break;
1147     }
1148
1149     strcat(regstr, tmpStr);
1150 }
1151
1152 /* Get the GLSL write mask for the destination register */
1153 static DWORD shader_glsl_get_write_mask(const DWORD param, char *write_mask) {
1154     char *ptr = write_mask;
1155     DWORD mask = param & WINED3DSP_WRITEMASK_ALL;
1156
1157     if (shader_is_scalar(param)) {
1158         mask = WINED3DSP_WRITEMASK_0;
1159     } else {
1160         *ptr++ = '.';
1161         if (param & WINED3DSP_WRITEMASK_0) *ptr++ = 'x';
1162         if (param & WINED3DSP_WRITEMASK_1) *ptr++ = 'y';
1163         if (param & WINED3DSP_WRITEMASK_2) *ptr++ = 'z';
1164         if (param & WINED3DSP_WRITEMASK_3) *ptr++ = 'w';
1165     }
1166
1167     *ptr = '\0';
1168
1169     return mask;
1170 }
1171
1172 static unsigned int shader_glsl_get_write_mask_size(DWORD write_mask) {
1173     unsigned int size = 0;
1174
1175     if (write_mask & WINED3DSP_WRITEMASK_0) ++size;
1176     if (write_mask & WINED3DSP_WRITEMASK_1) ++size;
1177     if (write_mask & WINED3DSP_WRITEMASK_2) ++size;
1178     if (write_mask & WINED3DSP_WRITEMASK_3) ++size;
1179
1180     return size;
1181 }
1182
1183 static void shader_glsl_get_swizzle(const DWORD param, BOOL fixup, DWORD mask, char *swizzle_str) {
1184     /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
1185      * but addressed as "rgba". To fix this we need to swap the register's x
1186      * and z components. */
1187     DWORD swizzle = (param & WINED3DSP_SWIZZLE_MASK) >> WINED3DSP_SWIZZLE_SHIFT;
1188     const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
1189     char *ptr = swizzle_str;
1190
1191     if (!shader_is_scalar(param)) {
1192         *ptr++ = '.';
1193         /* swizzle bits fields: wwzzyyxx */
1194         if (mask & WINED3DSP_WRITEMASK_0) *ptr++ = swizzle_chars[swizzle & 0x03];
1195         if (mask & WINED3DSP_WRITEMASK_1) *ptr++ = swizzle_chars[(swizzle >> 2) & 0x03];
1196         if (mask & WINED3DSP_WRITEMASK_2) *ptr++ = swizzle_chars[(swizzle >> 4) & 0x03];
1197         if (mask & WINED3DSP_WRITEMASK_3) *ptr++ = swizzle_chars[(swizzle >> 6) & 0x03];
1198     }
1199
1200     *ptr = '\0';
1201 }
1202
1203 /* From a given parameter token, generate the corresponding GLSL string.
1204  * Also, return the actual register name and swizzle in case the
1205  * caller needs this information as well. */
1206 static void shader_glsl_add_src_param(const SHADER_OPCODE_ARG *arg, const DWORD param,
1207         const DWORD addr_token, DWORD mask, glsl_src_param_t *src_param)
1208 {
1209     BOOL is_color = FALSE;
1210     char swizzle_str[6];
1211
1212     src_param->reg_name[0] = '\0';
1213     src_param->param_str[0] = '\0';
1214     swizzle_str[0] = '\0';
1215
1216     shader_glsl_get_register_name(param, addr_token, src_param->reg_name, &is_color, arg);
1217
1218     shader_glsl_get_swizzle(param, is_color, mask, swizzle_str);
1219     shader_glsl_gen_modifier(param, src_param->reg_name, swizzle_str, src_param->param_str);
1220 }
1221
1222 /* From a given parameter token, generate the corresponding GLSL string.
1223  * Also, return the actual register name and swizzle in case the
1224  * caller needs this information as well. */
1225 static DWORD shader_glsl_add_dst_param(const SHADER_OPCODE_ARG* arg, const DWORD param,
1226         const DWORD addr_token, glsl_dst_param_t *dst_param)
1227 {
1228     BOOL is_color = FALSE;
1229
1230     dst_param->mask_str[0] = '\0';
1231     dst_param->reg_name[0] = '\0';
1232
1233     shader_glsl_get_register_name(param, addr_token, dst_param->reg_name, &is_color, arg);
1234     return shader_glsl_get_write_mask(param, dst_param->mask_str);
1235 }
1236
1237 /* Append the destination part of the instruction to the buffer, return the effective write mask */
1238 static DWORD shader_glsl_append_dst_ext(SHADER_BUFFER *buffer, const SHADER_OPCODE_ARG *arg, const DWORD param)
1239 {
1240     glsl_dst_param_t dst_param;
1241     DWORD mask;
1242     int shift;
1243
1244     mask = shader_glsl_add_dst_param(arg, param, arg->dst_addr, &dst_param);
1245
1246     if(mask) {
1247         shift = (param & WINED3DSP_DSTSHIFT_MASK) >> WINED3DSP_DSTSHIFT_SHIFT;
1248         shader_addline(buffer, "%s%s = %s(", dst_param.reg_name, dst_param.mask_str, shift_glsl_tab[shift]);
1249     }
1250
1251     return mask;
1252 }
1253
1254 /* Append the destination part of the instruction to the buffer, return the effective write mask */
1255 static DWORD shader_glsl_append_dst(SHADER_BUFFER *buffer, const SHADER_OPCODE_ARG *arg)
1256 {
1257     return shader_glsl_append_dst_ext(buffer, arg, arg->dst);
1258 }
1259
1260 /** Process GLSL instruction modifiers */
1261 void shader_glsl_add_instruction_modifiers(const SHADER_OPCODE_ARG* arg)
1262 {
1263     DWORD mask = arg->dst & WINED3DSP_DSTMOD_MASK;
1264  
1265     if (arg->opcode->dst_token && mask != 0) {
1266         glsl_dst_param_t dst_param;
1267
1268         shader_glsl_add_dst_param(arg, arg->dst, 0, &dst_param);
1269
1270         if (mask & WINED3DSPDM_SATURATE) {
1271             /* _SAT means to clamp the value of the register to between 0 and 1 */
1272             shader_addline(arg->buffer, "%s%s = clamp(%s%s, 0.0, 1.0);\n", dst_param.reg_name,
1273                     dst_param.mask_str, dst_param.reg_name, dst_param.mask_str);
1274         }
1275         if (mask & WINED3DSPDM_MSAMPCENTROID) {
1276             FIXME("_centroid modifier not handled\n");
1277         }
1278         if (mask & WINED3DSPDM_PARTIALPRECISION) {
1279             /* MSDN says this modifier can be safely ignored, so that's what we'll do. */
1280         }
1281     }
1282 }
1283
1284 static inline const char* shader_get_comp_op(
1285     const DWORD opcode) {
1286
1287     DWORD op = (opcode & INST_CONTROLS_MASK) >> INST_CONTROLS_SHIFT;
1288     switch (op) {
1289         case COMPARISON_GT: return ">";
1290         case COMPARISON_EQ: return "==";
1291         case COMPARISON_GE: return ">=";
1292         case COMPARISON_LT: return "<";
1293         case COMPARISON_NE: return "!=";
1294         case COMPARISON_LE: return "<=";
1295         default:
1296             FIXME("Unrecognized comparison value: %u\n", op);
1297             return "(\?\?)";
1298     }
1299 }
1300
1301 static void shader_glsl_get_sample_function(DWORD sampler_type, BOOL projected, BOOL texrect, glsl_sample_function_t *sample_function) {
1302     /* Note that there's no such thing as a projected cube texture. */
1303     switch(sampler_type) {
1304         case WINED3DSTT_1D:
1305             sample_function->name = projected ? "texture1DProj" : "texture1D";
1306             sample_function->coord_mask = WINED3DSP_WRITEMASK_0;
1307             break;
1308         case WINED3DSTT_2D:
1309             if(texrect) {
1310                 sample_function->name = projected ? "texture2DRectProj" : "texture2DRect";
1311             } else {
1312                 sample_function->name = projected ? "texture2DProj" : "texture2D";
1313             }
1314             sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1;
1315             break;
1316         case WINED3DSTT_CUBE:
1317             sample_function->name = "textureCube";
1318             sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1319             break;
1320         case WINED3DSTT_VOLUME:
1321             sample_function->name = projected ? "texture3DProj" : "texture3D";
1322             sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1323             break;
1324         default:
1325             sample_function->name = "";
1326             sample_function->coord_mask = 0;
1327             FIXME("Unrecognized sampler type: %#x;\n", sampler_type);
1328             break;
1329     }
1330 }
1331
1332 static void shader_glsl_append_fixup_arg(char *arguments, const char *reg_name,
1333         BOOL sign_fixup, enum fixup_channel_source channel_source)
1334 {
1335     switch(channel_source)
1336     {
1337         case CHANNEL_SOURCE_ZERO:
1338             strcat(arguments, "0.0");
1339             break;
1340
1341         case CHANNEL_SOURCE_ONE:
1342             strcat(arguments, "1.0");
1343             break;
1344
1345         case CHANNEL_SOURCE_X:
1346             strcat(arguments, reg_name);
1347             strcat(arguments, ".x");
1348             break;
1349
1350         case CHANNEL_SOURCE_Y:
1351             strcat(arguments, reg_name);
1352             strcat(arguments, ".y");
1353             break;
1354
1355         case CHANNEL_SOURCE_Z:
1356             strcat(arguments, reg_name);
1357             strcat(arguments, ".z");
1358             break;
1359
1360         case CHANNEL_SOURCE_W:
1361             strcat(arguments, reg_name);
1362             strcat(arguments, ".w");
1363             break;
1364
1365         default:
1366             FIXME("Unhandled channel source %#x\n", channel_source);
1367             strcat(arguments, "undefined");
1368             break;
1369     }
1370
1371     if (sign_fixup) strcat(arguments, " * 2.0 - 1.0");
1372 }
1373
1374 static void shader_glsl_color_correction(const struct SHADER_OPCODE_ARG *arg, struct color_fixup_desc fixup)
1375 {
1376     unsigned int mask_size, remaining;
1377     glsl_dst_param_t dst_param;
1378     char arguments[256];
1379     DWORD mask;
1380     BOOL dummy;
1381
1382     mask = 0;
1383     if (fixup.x_sign_fixup || fixup.x_source != CHANNEL_SOURCE_X) mask |= WINED3DSP_WRITEMASK_0;
1384     if (fixup.y_sign_fixup || fixup.y_source != CHANNEL_SOURCE_Y) mask |= WINED3DSP_WRITEMASK_1;
1385     if (fixup.z_sign_fixup || fixup.z_source != CHANNEL_SOURCE_Z) mask |= WINED3DSP_WRITEMASK_2;
1386     if (fixup.w_sign_fixup || fixup.w_source != CHANNEL_SOURCE_W) mask |= WINED3DSP_WRITEMASK_3;
1387     mask &= arg->dst;
1388
1389     if (!mask) return; /* Nothing to do */
1390
1391     if (is_yuv_fixup(fixup))
1392     {
1393         enum yuv_fixup yuv_fixup = get_yuv_fixup(fixup);
1394         FIXME("YUV fixup (%#x) not supported\n", yuv_fixup);
1395         return;
1396     }
1397
1398     mask_size = shader_glsl_get_write_mask_size(mask);
1399
1400     dst_param.mask_str[0] = '\0';
1401     shader_glsl_get_write_mask(mask, dst_param.mask_str);
1402
1403     dst_param.reg_name[0] = '\0';
1404     shader_glsl_get_register_name(arg->dst, arg->dst_addr, dst_param.reg_name, &dummy, arg);
1405
1406     arguments[0] = '\0';
1407     remaining = mask_size;
1408     if (mask & WINED3DSP_WRITEMASK_0)
1409     {
1410         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.x_sign_fixup, fixup.x_source);
1411         if (--remaining) strcat(arguments, ", ");
1412     }
1413     if (mask & WINED3DSP_WRITEMASK_1)
1414     {
1415         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.y_sign_fixup, fixup.y_source);
1416         if (--remaining) strcat(arguments, ", ");
1417     }
1418     if (mask & WINED3DSP_WRITEMASK_2)
1419     {
1420         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.z_sign_fixup, fixup.z_source);
1421         if (--remaining) strcat(arguments, ", ");
1422     }
1423     if (mask & WINED3DSP_WRITEMASK_3)
1424     {
1425         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.w_sign_fixup, fixup.w_source);
1426         if (--remaining) strcat(arguments, ", ");
1427     }
1428
1429     if (mask_size > 1)
1430     {
1431         shader_addline(arg->buffer, "%s%s = vec%u(%s);\n",
1432                 dst_param.reg_name, dst_param.mask_str, mask_size, arguments);
1433     }
1434     else
1435     {
1436         shader_addline(arg->buffer, "%s%s = %s;\n", dst_param.reg_name, dst_param.mask_str, arguments);
1437     }
1438 }
1439
1440 /*****************************************************************************
1441  * 
1442  * Begin processing individual instruction opcodes
1443  * 
1444  ****************************************************************************/
1445
1446 /* Generate GLSL arithmetic functions (dst = src1 + src2) */
1447 static void shader_glsl_arith(const SHADER_OPCODE_ARG *arg)
1448 {
1449     CONST SHADER_OPCODE* curOpcode = arg->opcode;
1450     SHADER_BUFFER* buffer = arg->buffer;
1451     glsl_src_param_t src0_param;
1452     glsl_src_param_t src1_param;
1453     DWORD write_mask;
1454     char op;
1455
1456     /* Determine the GLSL operator to use based on the opcode */
1457     switch (curOpcode->opcode) {
1458         case WINED3DSIO_MUL: op = '*'; break;
1459         case WINED3DSIO_ADD: op = '+'; break;
1460         case WINED3DSIO_SUB: op = '-'; break;
1461         default:
1462             op = ' ';
1463             FIXME("Opcode %s not yet handled in GLSL\n", curOpcode->name);
1464             break;
1465     }
1466
1467     write_mask = shader_glsl_append_dst(buffer, arg);
1468     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
1469     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1470     shader_addline(buffer, "%s %c %s);\n", src0_param.param_str, op, src1_param.param_str);
1471 }
1472
1473 /* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
1474 static void shader_glsl_mov(const SHADER_OPCODE_ARG *arg)
1475 {
1476     SHADER_BUFFER* buffer = arg->buffer;
1477     glsl_src_param_t src0_param;
1478     DWORD write_mask;
1479
1480     write_mask = shader_glsl_append_dst(buffer, arg);
1481     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
1482
1483     /* In vs_1_1 WINED3DSIO_MOV can write to the address register. In later
1484      * shader versions WINED3DSIO_MOVA is used for this. */
1485     if ((WINED3DSHADER_VERSION_MAJOR(arg->reg_maps->shader_version) == 1
1486             && !shader_is_pshader_version(arg->reg_maps->shader_version)
1487             && shader_get_regtype(arg->dst) == WINED3DSPR_ADDR))
1488     {
1489         /* This is a simple floor() */
1490         unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
1491         if (mask_size > 1) {
1492             shader_addline(buffer, "ivec%d(floor(%s)));\n", mask_size, src0_param.param_str);
1493         } else {
1494             shader_addline(buffer, "int(floor(%s)));\n", src0_param.param_str);
1495         }
1496     } else if(arg->opcode->opcode == WINED3DSIO_MOVA) {
1497         /* We need to *round* to the nearest int here. */
1498         unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
1499         if (mask_size > 1) {
1500             shader_addline(buffer, "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s)));\n", mask_size, src0_param.param_str, mask_size, src0_param.param_str);
1501         } else {
1502             shader_addline(buffer, "int(floor(abs(%s) + 0.5) * sign(%s)));\n", src0_param.param_str, src0_param.param_str);
1503         }
1504     } else {
1505         shader_addline(buffer, "%s);\n", src0_param.param_str);
1506     }
1507 }
1508
1509 /* Process the dot product operators DP3 and DP4 in GLSL (dst = dot(src0, src1)) */
1510 static void shader_glsl_dot(const SHADER_OPCODE_ARG *arg)
1511 {
1512     CONST SHADER_OPCODE* curOpcode = arg->opcode;
1513     SHADER_BUFFER* buffer = arg->buffer;
1514     glsl_src_param_t src0_param;
1515     glsl_src_param_t src1_param;
1516     DWORD dst_write_mask, src_write_mask;
1517     unsigned int dst_size = 0;
1518
1519     dst_write_mask = shader_glsl_append_dst(buffer, arg);
1520     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
1521
1522     /* dp3 works on vec3, dp4 on vec4 */
1523     if (curOpcode->opcode == WINED3DSIO_DP4) {
1524         src_write_mask = WINED3DSP_WRITEMASK_ALL;
1525     } else {
1526         src_write_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1527     }
1528
1529     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_write_mask, &src0_param);
1530     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], src_write_mask, &src1_param);
1531
1532     if (dst_size > 1) {
1533         shader_addline(buffer, "vec%d(dot(%s, %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
1534     } else {
1535         shader_addline(buffer, "dot(%s, %s));\n", src0_param.param_str, src1_param.param_str);
1536     }
1537 }
1538
1539 /* Note that this instruction has some restrictions. The destination write mask
1540  * can't contain the w component, and the source swizzles have to be .xyzw */
1541 static void shader_glsl_cross(const SHADER_OPCODE_ARG *arg)
1542 {
1543     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1544     glsl_src_param_t src0_param;
1545     glsl_src_param_t src1_param;
1546     char dst_mask[6];
1547
1548     shader_glsl_get_write_mask(arg->dst, dst_mask);
1549     shader_glsl_append_dst(arg->buffer, arg);
1550     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
1551     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], src_mask, &src1_param);
1552     shader_addline(arg->buffer, "cross(%s, %s)%s);\n", src0_param.param_str, src1_param.param_str, dst_mask);
1553 }
1554
1555 /* Process the WINED3DSIO_POW instruction in GLSL (dst = |src0|^src1)
1556  * Src0 and src1 are scalars. Note that D3D uses the absolute of src0, while
1557  * GLSL uses the value as-is. */
1558 static void shader_glsl_pow(const SHADER_OPCODE_ARG *arg)
1559 {
1560     SHADER_BUFFER *buffer = arg->buffer;
1561     glsl_src_param_t src0_param;
1562     glsl_src_param_t src1_param;
1563     DWORD dst_write_mask;
1564     unsigned int dst_size;
1565
1566     dst_write_mask = shader_glsl_append_dst(buffer, arg);
1567     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
1568
1569     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
1570     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0, &src1_param);
1571
1572     if (dst_size > 1) {
1573         shader_addline(buffer, "vec%d(pow(abs(%s), %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
1574     } else {
1575         shader_addline(buffer, "pow(abs(%s), %s));\n", src0_param.param_str, src1_param.param_str);
1576     }
1577 }
1578
1579 /* Process the WINED3DSIO_LOG instruction in GLSL (dst = log2(|src0|))
1580  * Src0 is a scalar. Note that D3D uses the absolute of src0, while
1581  * GLSL uses the value as-is. */
1582 static void shader_glsl_log(const SHADER_OPCODE_ARG *arg)
1583 {
1584     SHADER_BUFFER *buffer = arg->buffer;
1585     glsl_src_param_t src0_param;
1586     DWORD dst_write_mask;
1587     unsigned int dst_size;
1588
1589     dst_write_mask = shader_glsl_append_dst(buffer, arg);
1590     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
1591
1592     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
1593
1594     if (dst_size > 1) {
1595         shader_addline(buffer, "vec%d(log2(abs(%s))));\n", dst_size, src0_param.param_str);
1596     } else {
1597         shader_addline(buffer, "log2(abs(%s)));\n", src0_param.param_str);
1598     }
1599 }
1600
1601 /* Map the opcode 1-to-1 to the GL code (arg->dst = instruction(src0, src1, ...) */
1602 static void shader_glsl_map2gl(const SHADER_OPCODE_ARG *arg)
1603 {
1604     CONST SHADER_OPCODE* curOpcode = arg->opcode;
1605     SHADER_BUFFER* buffer = arg->buffer;
1606     glsl_src_param_t src_param;
1607     const char *instruction;
1608     char arguments[256];
1609     DWORD write_mask;
1610     unsigned i;
1611
1612     /* Determine the GLSL function to use based on the opcode */
1613     /* TODO: Possibly make this a table for faster lookups */
1614     switch (curOpcode->opcode) {
1615         case WINED3DSIO_MIN: instruction = "min"; break;
1616         case WINED3DSIO_MAX: instruction = "max"; break;
1617         case WINED3DSIO_ABS: instruction = "abs"; break;
1618         case WINED3DSIO_FRC: instruction = "fract"; break;
1619         case WINED3DSIO_NRM: instruction = "normalize"; break;
1620         case WINED3DSIO_EXP: instruction = "exp2"; break;
1621         case WINED3DSIO_SGN: instruction = "sign"; break;
1622         case WINED3DSIO_DSX: instruction = "dFdx"; break;
1623         case WINED3DSIO_DSY: instruction = "ycorrection.y * dFdy"; break;
1624         default: instruction = "";
1625             FIXME("Opcode %s not yet handled in GLSL\n", curOpcode->name);
1626             break;
1627     }
1628
1629     write_mask = shader_glsl_append_dst(buffer, arg);
1630
1631     arguments[0] = '\0';
1632     if (curOpcode->num_params > 0) {
1633         shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src_param);
1634         strcat(arguments, src_param.param_str);
1635         for (i = 2; i < curOpcode->num_params; ++i) {
1636             strcat(arguments, ", ");
1637             shader_glsl_add_src_param(arg, arg->src[i-1], arg->src_addr[i-1], write_mask, &src_param);
1638             strcat(arguments, src_param.param_str);
1639         }
1640     }
1641
1642     shader_addline(buffer, "%s(%s));\n", instruction, arguments);
1643 }
1644
1645 /** Process the WINED3DSIO_EXPP instruction in GLSL:
1646  * For shader model 1.x, do the following (and honor the writemask, so use a temporary variable):
1647  *   dst.x = 2^(floor(src))
1648  *   dst.y = src - floor(src)
1649  *   dst.z = 2^src   (partial precision is allowed, but optional)
1650  *   dst.w = 1.0;
1651  * For 2.0 shaders, just do this (honoring writemask and swizzle):
1652  *   dst = 2^src;    (partial precision is allowed, but optional)
1653  */
1654 static void shader_glsl_expp(const SHADER_OPCODE_ARG *arg)
1655 {
1656     glsl_src_param_t src_param;
1657
1658     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src_param);
1659
1660     if (arg->reg_maps->shader_version < WINED3DPS_VERSION(2,0))
1661     {
1662         char dst_mask[6];
1663
1664         shader_addline(arg->buffer, "tmp0.x = exp2(floor(%s));\n", src_param.param_str);
1665         shader_addline(arg->buffer, "tmp0.y = %s - floor(%s);\n", src_param.param_str, src_param.param_str);
1666         shader_addline(arg->buffer, "tmp0.z = exp2(%s);\n", src_param.param_str);
1667         shader_addline(arg->buffer, "tmp0.w = 1.0;\n");
1668
1669         shader_glsl_append_dst(arg->buffer, arg);
1670         shader_glsl_get_write_mask(arg->dst, dst_mask);
1671         shader_addline(arg->buffer, "tmp0%s);\n", dst_mask);
1672     } else {
1673         DWORD write_mask;
1674         unsigned int mask_size;
1675
1676         write_mask = shader_glsl_append_dst(arg->buffer, arg);
1677         mask_size = shader_glsl_get_write_mask_size(write_mask);
1678
1679         if (mask_size > 1) {
1680             shader_addline(arg->buffer, "vec%d(exp2(%s)));\n", mask_size, src_param.param_str);
1681         } else {
1682             shader_addline(arg->buffer, "exp2(%s));\n", src_param.param_str);
1683         }
1684     }
1685 }
1686
1687 /** Process the RCP (reciprocal or inverse) opcode in GLSL (dst = 1 / src) */
1688 static void shader_glsl_rcp(const SHADER_OPCODE_ARG *arg)
1689 {
1690     glsl_src_param_t src_param;
1691     DWORD write_mask;
1692     unsigned int mask_size;
1693
1694     write_mask = shader_glsl_append_dst(arg->buffer, arg);
1695     mask_size = shader_glsl_get_write_mask_size(write_mask);
1696     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &src_param);
1697
1698     if (mask_size > 1) {
1699         shader_addline(arg->buffer, "vec%d(1.0 / %s));\n", mask_size, src_param.param_str);
1700     } else {
1701         shader_addline(arg->buffer, "1.0 / %s);\n", src_param.param_str);
1702     }
1703 }
1704
1705 static void shader_glsl_rsq(const SHADER_OPCODE_ARG *arg)
1706 {
1707     SHADER_BUFFER* buffer = arg->buffer;
1708     glsl_src_param_t src_param;
1709     DWORD write_mask;
1710     unsigned int mask_size;
1711
1712     write_mask = shader_glsl_append_dst(buffer, arg);
1713     mask_size = shader_glsl_get_write_mask_size(write_mask);
1714
1715     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &src_param);
1716
1717     if (mask_size > 1) {
1718         shader_addline(buffer, "vec%d(inversesqrt(%s)));\n", mask_size, src_param.param_str);
1719     } else {
1720         shader_addline(buffer, "inversesqrt(%s));\n", src_param.param_str);
1721     }
1722 }
1723
1724 /** Process signed comparison opcodes in GLSL. */
1725 static void shader_glsl_compare(const SHADER_OPCODE_ARG *arg)
1726 {
1727     glsl_src_param_t src0_param;
1728     glsl_src_param_t src1_param;
1729     DWORD write_mask;
1730     unsigned int mask_size;
1731
1732     write_mask = shader_glsl_append_dst(arg->buffer, arg);
1733     mask_size = shader_glsl_get_write_mask_size(write_mask);
1734     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
1735     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1736
1737     if (mask_size > 1) {
1738         const char *compare;
1739
1740         switch(arg->opcode->opcode) {
1741             case WINED3DSIO_SLT: compare = "lessThan"; break;
1742             case WINED3DSIO_SGE: compare = "greaterThanEqual"; break;
1743             default: compare = "";
1744                 FIXME("Can't handle opcode %s\n", arg->opcode->name);
1745         }
1746
1747         shader_addline(arg->buffer, "vec%d(%s(%s, %s)));\n", mask_size, compare,
1748                 src0_param.param_str, src1_param.param_str);
1749     } else {
1750         switch(arg->opcode->opcode) {
1751             case WINED3DSIO_SLT:
1752                 /* Step(src0, src1) is not suitable here because if src0 == src1 SLT is supposed,
1753                  * to return 0.0 but step returns 1.0 because step is not < x
1754                  * An alternative is a bvec compare padded with an unused second component.
1755                  * step(src1 * -1.0, src0 * -1.0) is not an option because it suffers from the same
1756                  * issue. Playing with not() is not possible either because not() does not accept
1757                  * a scalar.
1758                  */
1759                 shader_addline(arg->buffer, "(%s < %s) ? 1.0 : 0.0);\n", src0_param.param_str, src1_param.param_str);
1760                 break;
1761             case WINED3DSIO_SGE:
1762                 /* Here we can use the step() function and safe a conditional */
1763                 shader_addline(arg->buffer, "step(%s, %s));\n", src1_param.param_str, src0_param.param_str);
1764                 break;
1765             default:
1766                 FIXME("Can't handle opcode %s\n", arg->opcode->name);
1767         }
1768
1769     }
1770 }
1771
1772 /** Process CMP instruction in GLSL (dst = src0 >= 0.0 ? src1 : src2), per channel */
1773 static void shader_glsl_cmp(const SHADER_OPCODE_ARG *arg)
1774 {
1775     glsl_src_param_t src0_param;
1776     glsl_src_param_t src1_param;
1777     glsl_src_param_t src2_param;
1778     DWORD write_mask, cmp_channel = 0;
1779     unsigned int i, j;
1780     char mask_char[6];
1781     BOOL temp_destination = FALSE;
1782
1783     if(shader_is_scalar(arg->src[0])) {
1784         write_mask = shader_glsl_append_dst(arg->buffer, arg);
1785
1786         shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
1787         shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1788         shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1789
1790         shader_addline(arg->buffer, "%s >= 0.0 ? %s : %s);\n",
1791                        src0_param.param_str, src1_param.param_str, src2_param.param_str);
1792     } else {
1793         DWORD src0reg = arg->src[0] & WINED3DSP_REGNUM_MASK;
1794         DWORD src1reg = arg->src[1] & WINED3DSP_REGNUM_MASK;
1795         DWORD src2reg = arg->src[2] & WINED3DSP_REGNUM_MASK;
1796         DWORD src0regtype = shader_get_regtype(arg->src[0]);
1797         DWORD src1regtype = shader_get_regtype(arg->src[1]);
1798         DWORD src2regtype = shader_get_regtype(arg->src[2]);
1799         DWORD dstreg = arg->dst & WINED3DSP_REGNUM_MASK;
1800         DWORD dstregtype = shader_get_regtype(arg->dst);
1801
1802         /* Cycle through all source0 channels */
1803         for (i=0; i<4; i++) {
1804             write_mask = 0;
1805             /* Find the destination channels which use the current source0 channel */
1806             for (j=0; j<4; j++) {
1807                 if ( ((arg->src[0] >> (WINED3DSP_SWIZZLE_SHIFT + 2*j)) & 0x3) == i ) {
1808                     write_mask |= WINED3DSP_WRITEMASK_0 << j;
1809                     cmp_channel = WINED3DSP_WRITEMASK_0 << j;
1810                 }
1811             }
1812
1813             /* Splitting the cmp instruction up in multiple lines imposes a problem:
1814             * The first lines may overwrite source parameters of the following lines.
1815             * Deal with that by using a temporary destination register if needed
1816             */
1817             if((src0reg == dstreg && src0regtype == dstregtype) ||
1818             (src1reg == dstreg && src1regtype == dstregtype) ||
1819             (src2reg == dstreg && src2regtype == dstregtype)) {
1820
1821                 write_mask = shader_glsl_get_write_mask(arg->dst & (~WINED3DSP_SWIZZLE_MASK | write_mask), mask_char);
1822                 if (!write_mask) continue;
1823                 shader_addline(arg->buffer, "tmp0%s = (", mask_char);
1824                 temp_destination = TRUE;
1825             } else {
1826                 write_mask = shader_glsl_append_dst_ext(arg->buffer, arg, arg->dst & (~WINED3DSP_SWIZZLE_MASK | write_mask));
1827                 if (!write_mask) continue;
1828             }
1829
1830             shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], cmp_channel, &src0_param);
1831             shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1832             shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1833
1834             shader_addline(arg->buffer, "%s >= 0.0 ? %s : %s);\n",
1835                         src0_param.param_str, src1_param.param_str, src2_param.param_str);
1836         }
1837
1838         if(temp_destination) {
1839             shader_glsl_get_write_mask(arg->dst, mask_char);
1840             shader_glsl_append_dst_ext(arg->buffer, arg, arg->dst);
1841             shader_addline(arg->buffer, "tmp0%s);\n", mask_char);
1842         }
1843     }
1844
1845 }
1846
1847 /** Process the CND opcode in GLSL (dst = (src0 > 0.5) ? src1 : src2) */
1848 /* For ps 1.1-1.3, only a single component of src0 is used. For ps 1.4
1849  * the compare is done per component of src0. */
1850 static void shader_glsl_cnd(const SHADER_OPCODE_ARG *arg)
1851 {
1852     glsl_src_param_t src0_param;
1853     glsl_src_param_t src1_param;
1854     glsl_src_param_t src2_param;
1855     DWORD write_mask, cmp_channel = 0;
1856     unsigned int i, j;
1857
1858     if (arg->reg_maps->shader_version < WINED3DPS_VERSION(1, 4))
1859     {
1860         write_mask = shader_glsl_append_dst(arg->buffer, arg);
1861         shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
1862         shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1863         shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1864
1865         /* Fun: The D3DSI_COISSUE flag changes the semantic of the cnd instruction for < 1.4 shaders */
1866         if(arg->opcode_token & WINED3DSI_COISSUE) {
1867             shader_addline(arg->buffer, "%s /* COISSUE! */);\n", src1_param.param_str);
1868         } else {
1869             shader_addline(arg->buffer, "%s > 0.5 ? %s : %s);\n",
1870                     src0_param.param_str, src1_param.param_str, src2_param.param_str);
1871         }
1872         return;
1873     }
1874     /* Cycle through all source0 channels */
1875     for (i=0; i<4; i++) {
1876         write_mask = 0;
1877         /* Find the destination channels which use the current source0 channel */
1878         for (j=0; j<4; j++) {
1879             if ( ((arg->src[0] >> (WINED3DSP_SWIZZLE_SHIFT + 2*j)) & 0x3) == i ) {
1880                 write_mask |= WINED3DSP_WRITEMASK_0 << j;
1881                 cmp_channel = WINED3DSP_WRITEMASK_0 << j;
1882             }
1883         }
1884         write_mask = shader_glsl_append_dst_ext(arg->buffer, arg, arg->dst & (~WINED3DSP_SWIZZLE_MASK | write_mask));
1885         if (!write_mask) continue;
1886
1887         shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], cmp_channel, &src0_param);
1888         shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1889         shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1890
1891         shader_addline(arg->buffer, "%s > 0.5 ? %s : %s);\n",
1892                 src0_param.param_str, src1_param.param_str, src2_param.param_str);
1893     }
1894 }
1895
1896 /** GLSL code generation for WINED3DSIO_MAD: Multiply the first 2 opcodes, then add the last */
1897 static void shader_glsl_mad(const SHADER_OPCODE_ARG *arg)
1898 {
1899     glsl_src_param_t src0_param;
1900     glsl_src_param_t src1_param;
1901     glsl_src_param_t src2_param;
1902     DWORD write_mask;
1903
1904     write_mask = shader_glsl_append_dst(arg->buffer, arg);
1905     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
1906     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1907     shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1908     shader_addline(arg->buffer, "(%s * %s) + %s);\n",
1909             src0_param.param_str, src1_param.param_str, src2_param.param_str);
1910 }
1911
1912 /** Handles transforming all WINED3DSIO_M?x? opcodes for 
1913     Vertex shaders to GLSL codes */
1914 static void shader_glsl_mnxn(const SHADER_OPCODE_ARG *arg)
1915 {
1916     IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)arg->shader;
1917     const SHADER_OPCODE *opcode_table = shader->baseShader.shader_ins;
1918     DWORD shader_version = arg->reg_maps->shader_version;
1919     int i;
1920     int nComponents = 0;
1921     SHADER_OPCODE_ARG tmpArg;
1922    
1923     memset(&tmpArg, 0, sizeof(SHADER_OPCODE_ARG));
1924
1925     /* Set constants for the temporary argument */
1926     tmpArg.shader      = arg->shader;
1927     tmpArg.buffer      = arg->buffer;
1928     tmpArg.src[0]      = arg->src[0];
1929     tmpArg.src_addr[0] = arg->src_addr[0];
1930     tmpArg.src_addr[1] = arg->src_addr[1];
1931     tmpArg.reg_maps = arg->reg_maps; 
1932     
1933     switch(arg->opcode->opcode) {
1934         case WINED3DSIO_M4x4:
1935             nComponents = 4;
1936             tmpArg.opcode = shader_get_opcode(opcode_table, shader_version, WINED3DSIO_DP4);
1937             break;
1938         case WINED3DSIO_M4x3:
1939             nComponents = 3;
1940             tmpArg.opcode = shader_get_opcode(opcode_table, shader_version, WINED3DSIO_DP4);
1941             break;
1942         case WINED3DSIO_M3x4:
1943             nComponents = 4;
1944             tmpArg.opcode = shader_get_opcode(opcode_table, shader_version, WINED3DSIO_DP3);
1945             break;
1946         case WINED3DSIO_M3x3:
1947             nComponents = 3;
1948             tmpArg.opcode = shader_get_opcode(opcode_table, shader_version, WINED3DSIO_DP3);
1949             break;
1950         case WINED3DSIO_M3x2:
1951             nComponents = 2;
1952             tmpArg.opcode = shader_get_opcode(opcode_table, shader_version, WINED3DSIO_DP3);
1953             break;
1954         default:
1955             break;
1956     }
1957
1958     for (i = 0; i < nComponents; i++) {
1959         tmpArg.dst = ((arg->dst) & ~WINED3DSP_WRITEMASK_ALL)|(WINED3DSP_WRITEMASK_0<<i);
1960         tmpArg.src[1]      = arg->src[1]+i;
1961         shader_glsl_dot(&tmpArg);
1962     }
1963 }
1964
1965 /**
1966     The LRP instruction performs a component-wise linear interpolation 
1967     between the second and third operands using the first operand as the
1968     blend factor.  Equation:  (dst = src2 + src0 * (src1 - src2))
1969     This is equivalent to mix(src2, src1, src0);
1970 */
1971 static void shader_glsl_lrp(const SHADER_OPCODE_ARG *arg)
1972 {
1973     glsl_src_param_t src0_param;
1974     glsl_src_param_t src1_param;
1975     glsl_src_param_t src2_param;
1976     DWORD write_mask;
1977
1978     write_mask = shader_glsl_append_dst(arg->buffer, arg);
1979
1980     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
1981     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1982     shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1983
1984     shader_addline(arg->buffer, "mix(%s, %s, %s));\n",
1985             src2_param.param_str, src1_param.param_str, src0_param.param_str);
1986 }
1987
1988 /** Process the WINED3DSIO_LIT instruction in GLSL:
1989  * dst.x = dst.w = 1.0
1990  * dst.y = (src0.x > 0) ? src0.x
1991  * dst.z = (src0.x > 0) ? ((src0.y > 0) ? pow(src0.y, src.w) : 0) : 0
1992  *                                        where src.w is clamped at +- 128
1993  */
1994 static void shader_glsl_lit(const SHADER_OPCODE_ARG *arg)
1995 {
1996     glsl_src_param_t src0_param;
1997     glsl_src_param_t src1_param;
1998     glsl_src_param_t src3_param;
1999     char dst_mask[6];
2000
2001     shader_glsl_append_dst(arg->buffer, arg);
2002     shader_glsl_get_write_mask(arg->dst, dst_mask);
2003
2004     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
2005     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_1, &src1_param);
2006     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &src3_param);
2007
2008     /* The sdk specifies the instruction like this
2009      * dst.x = 1.0;
2010      * if(src.x > 0.0) dst.y = src.x
2011      * else dst.y = 0.0.
2012      * if(src.x > 0.0 && src.y > 0.0) dst.z = pow(src.y, power);
2013      * else dst.z = 0.0;
2014      * dst.w = 1.0;
2015      *
2016      * Obviously that has quite a few conditionals in it which we don't like. So the first step is this:
2017      * dst.x = 1.0                                  ... No further explanation needed
2018      * dst.y = max(src.y, 0.0);                     ... If x < 0.0, use 0.0, otherwise x. Same as the conditional
2019      * dst.z = x > 0.0 ? pow(max(y, 0.0), p) : 0;   ... 0 ^ power is 0, and otherwise we use y anyway
2020      * dst.w = 1.0.                                 ... Nothing fancy.
2021      *
2022      * So we still have one conditional in there. So do this:
2023      * dst.z = pow(max(0.0, src.y) * step(0.0, src.x), power);
2024      *
2025      * step(0.0, x) will return 1 if src.x > 0.0, and 0 otherwise. So if y is 0 we get pow(0.0 * 1.0, power),
2026      * which sets dst.z to 0. If y > 0, but x = 0.0, we get pow(y * 0.0, power), which results in 0 too.
2027      * if both x and y are > 0, we get pow(y * 1.0, power), as it is supposed to
2028      */
2029     shader_addline(arg->buffer, "vec4(1.0, max(%s, 0.0), pow(max(0.0, %s) * step(0.0, %s), clamp(%s, -128.0, 128.0)), 1.0)%s);\n",
2030                    src0_param.param_str, src1_param.param_str, src0_param.param_str, src3_param.param_str, dst_mask);
2031 }
2032
2033 /** Process the WINED3DSIO_DST instruction in GLSL:
2034  * dst.x = 1.0
2035  * dst.y = src0.x * src0.y
2036  * dst.z = src0.z
2037  * dst.w = src1.w
2038  */
2039 static void shader_glsl_dst(const SHADER_OPCODE_ARG *arg)
2040 {
2041     glsl_src_param_t src0y_param;
2042     glsl_src_param_t src0z_param;
2043     glsl_src_param_t src1y_param;
2044     glsl_src_param_t src1w_param;
2045     char dst_mask[6];
2046
2047     shader_glsl_append_dst(arg->buffer, arg);
2048     shader_glsl_get_write_mask(arg->dst, dst_mask);
2049
2050     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_1, &src0y_param);
2051     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_2, &src0z_param);
2052     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_1, &src1y_param);
2053     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_3, &src1w_param);
2054
2055     shader_addline(arg->buffer, "vec4(1.0, %s * %s, %s, %s))%s;\n",
2056             src0y_param.param_str, src1y_param.param_str, src0z_param.param_str, src1w_param.param_str, dst_mask);
2057 }
2058
2059 /** Process the WINED3DSIO_SINCOS instruction in GLSL:
2060  * VS 2.0 requires that specific cosine and sine constants be passed to this instruction so the hardware
2061  * can handle it.  But, these functions are built-in for GLSL, so we can just ignore the last 2 params.
2062  * 
2063  * dst.x = cos(src0.?)
2064  * dst.y = sin(src0.?)
2065  * dst.z = dst.z
2066  * dst.w = dst.w
2067  */
2068 static void shader_glsl_sincos(const SHADER_OPCODE_ARG *arg)
2069 {
2070     glsl_src_param_t src0_param;
2071     DWORD write_mask;
2072
2073     write_mask = shader_glsl_append_dst(arg->buffer, arg);
2074     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
2075
2076     switch (write_mask) {
2077         case WINED3DSP_WRITEMASK_0:
2078             shader_addline(arg->buffer, "cos(%s));\n", src0_param.param_str);
2079             break;
2080
2081         case WINED3DSP_WRITEMASK_1:
2082             shader_addline(arg->buffer, "sin(%s));\n", src0_param.param_str);
2083             break;
2084
2085         case (WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1):
2086             shader_addline(arg->buffer, "vec2(cos(%s), sin(%s)));\n", src0_param.param_str, src0_param.param_str);
2087             break;
2088
2089         default:
2090             ERR("Write mask should be .x, .y or .xy\n");
2091             break;
2092     }
2093 }
2094
2095 /** Process the WINED3DSIO_LOOP instruction in GLSL:
2096  * Start a for() loop where src1.y is the initial value of aL,
2097  *  increment aL by src1.z for a total of src1.x iterations.
2098  *  Need to use a temporary variable for this operation.
2099  */
2100 /* FIXME: I don't think nested loops will work correctly this way. */
2101 static void shader_glsl_loop(const SHADER_OPCODE_ARG *arg)
2102 {
2103     glsl_src_param_t src1_param;
2104     IWineD3DBaseShaderImpl* shader = (IWineD3DBaseShaderImpl*) arg->shader;
2105     DWORD regtype = shader_get_regtype(arg->src[1]);
2106     DWORD reg = arg->src[1] & WINED3DSP_REGNUM_MASK;
2107     const DWORD *control_values = NULL;
2108     const local_constant *constant;
2109
2110     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_ALL, &src1_param);
2111
2112     /* Try to hardcode the loop control parameters if possible. Direct3D 9 class hardware doesn't support real
2113      * varying indexing, but Microsoft designed this feature for Shader model 2.x+. If the loop control is
2114      * known at compile time, the GLSL compiler can unroll the loop, and replace indirect addressing with direct
2115      * addressing.
2116      */
2117     if(regtype == WINED3DSPR_CONSTINT) {
2118         LIST_FOR_EACH_ENTRY(constant, &shader->baseShader.constantsI, local_constant, entry) {
2119             if(constant->idx == reg) {
2120                 control_values = constant->value;
2121                 break;
2122             }
2123         }
2124     }
2125
2126     if(control_values) {
2127         if(control_values[2] > 0) {
2128             shader_addline(arg->buffer, "for (aL%u = %d; aL%u < (%d * %d + %d); aL%u += %d) {\n",
2129                            shader->baseShader.cur_loop_depth, control_values[1],
2130                            shader->baseShader.cur_loop_depth, control_values[0], control_values[2], control_values[1],
2131                            shader->baseShader.cur_loop_depth, control_values[2]);
2132         } else if(control_values[2] == 0) {
2133             shader_addline(arg->buffer, "for (aL%u = %d, tmpInt%u = 0; tmpInt%u < %d; tmpInt%u++) {\n",
2134                            shader->baseShader.cur_loop_depth, control_values[1], shader->baseShader.cur_loop_depth,
2135                            shader->baseShader.cur_loop_depth, control_values[0],
2136                            shader->baseShader.cur_loop_depth);
2137         } else {
2138             shader_addline(arg->buffer, "for (aL%u = %d; aL%u > (%d * %d + %d); aL%u += %d) {\n",
2139                            shader->baseShader.cur_loop_depth, control_values[1],
2140                            shader->baseShader.cur_loop_depth, control_values[0], control_values[2], control_values[1],
2141                            shader->baseShader.cur_loop_depth, control_values[2]);
2142         }
2143     } else {
2144         shader_addline(arg->buffer, "for (tmpInt%u = 0, aL%u = %s.y; tmpInt%u < %s.x; tmpInt%u++, aL%u += %s.z) {\n",
2145                        shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_regno,
2146                        src1_param.reg_name, shader->baseShader.cur_loop_depth, src1_param.reg_name,
2147                        shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_regno, src1_param.reg_name);
2148     }
2149
2150     shader->baseShader.cur_loop_depth++;
2151     shader->baseShader.cur_loop_regno++;
2152 }
2153
2154 static void shader_glsl_end(const SHADER_OPCODE_ARG *arg)
2155 {
2156     IWineD3DBaseShaderImpl* shader = (IWineD3DBaseShaderImpl*) arg->shader;
2157
2158     shader_addline(arg->buffer, "}\n");
2159
2160     if(arg->opcode->opcode == WINED3DSIO_ENDLOOP) {
2161         shader->baseShader.cur_loop_depth--;
2162         shader->baseShader.cur_loop_regno--;
2163     }
2164     if(arg->opcode->opcode == WINED3DSIO_ENDREP) {
2165         shader->baseShader.cur_loop_depth--;
2166     }
2167 }
2168
2169 static void shader_glsl_rep(const SHADER_OPCODE_ARG *arg)
2170 {
2171     IWineD3DBaseShaderImpl* shader = (IWineD3DBaseShaderImpl*) arg->shader;
2172     glsl_src_param_t src0_param;
2173
2174     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
2175     shader_addline(arg->buffer, "for (tmpInt%d = 0; tmpInt%d < %s; tmpInt%d++) {\n",
2176                    shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_depth,
2177                    src0_param.param_str, shader->baseShader.cur_loop_depth);
2178     shader->baseShader.cur_loop_depth++;
2179 }
2180
2181 static void shader_glsl_if(const SHADER_OPCODE_ARG *arg)
2182 {
2183     glsl_src_param_t src0_param;
2184
2185     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
2186     shader_addline(arg->buffer, "if (%s) {\n", src0_param.param_str);
2187 }
2188
2189 static void shader_glsl_ifc(const SHADER_OPCODE_ARG *arg)
2190 {
2191     glsl_src_param_t src0_param;
2192     glsl_src_param_t src1_param;
2193
2194     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
2195     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0, &src1_param);
2196
2197     shader_addline(arg->buffer, "if (%s %s %s) {\n",
2198             src0_param.param_str, shader_get_comp_op(arg->opcode_token), src1_param.param_str);
2199 }
2200
2201 static void shader_glsl_else(const SHADER_OPCODE_ARG *arg)
2202 {
2203     shader_addline(arg->buffer, "} else {\n");
2204 }
2205
2206 static void shader_glsl_break(const SHADER_OPCODE_ARG *arg)
2207 {
2208     shader_addline(arg->buffer, "break;\n");
2209 }
2210
2211 /* FIXME: According to MSDN the compare is done per component. */
2212 static void shader_glsl_breakc(const SHADER_OPCODE_ARG *arg)
2213 {
2214     glsl_src_param_t src0_param;
2215     glsl_src_param_t src1_param;
2216
2217     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
2218     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0, &src1_param);
2219
2220     shader_addline(arg->buffer, "if (%s %s %s) break;\n",
2221             src0_param.param_str, shader_get_comp_op(arg->opcode_token), src1_param.param_str);
2222 }
2223
2224 static void shader_glsl_label(const SHADER_OPCODE_ARG *arg)
2225 {
2226
2227     DWORD snum = (arg->src[0]) & WINED3DSP_REGNUM_MASK;
2228     shader_addline(arg->buffer, "}\n");
2229     shader_addline(arg->buffer, "void subroutine%u () {\n",  snum);
2230 }
2231
2232 static void shader_glsl_call(const SHADER_OPCODE_ARG *arg)
2233 {
2234     DWORD snum = (arg->src[0]) & WINED3DSP_REGNUM_MASK;
2235     shader_addline(arg->buffer, "subroutine%u();\n", snum);
2236 }
2237
2238 static void shader_glsl_callnz(const SHADER_OPCODE_ARG *arg)
2239 {
2240     glsl_src_param_t src1_param;
2241
2242     DWORD snum = (arg->src[0]) & WINED3DSP_REGNUM_MASK;
2243     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0, &src1_param);
2244     shader_addline(arg->buffer, "if (%s) subroutine%u();\n", src1_param.param_str, snum);
2245 }
2246
2247 /*********************************************
2248  * Pixel Shader Specific Code begins here
2249  ********************************************/
2250 static void pshader_glsl_tex(const SHADER_OPCODE_ARG *arg)
2251 {
2252     IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
2253     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
2254     DWORD shader_version = arg->reg_maps->shader_version;
2255     char dst_swizzle[6];
2256     glsl_sample_function_t sample_function;
2257     DWORD sampler_type;
2258     DWORD sampler_idx;
2259     BOOL projected, texrect = FALSE;
2260     DWORD mask = 0;
2261
2262     /* All versions have a destination register */
2263     shader_glsl_append_dst(arg->buffer, arg);
2264
2265     /* 1.0-1.4: Use destination register as sampler source.
2266      * 2.0+: Use provided sampler source. */
2267     if (shader_version < WINED3DPS_VERSION(2,0)) sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2268     else sampler_idx = arg->src[1] & WINED3DSP_REGNUM_MASK;
2269     sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
2270
2271     if (shader_version < WINED3DPS_VERSION(1,4))
2272     {
2273         DWORD flags = deviceImpl->stateBlock->textureState[sampler_idx][WINED3DTSS_TEXTURETRANSFORMFLAGS];
2274
2275         /* Projected cube textures don't make a lot of sense, the resulting coordinates stay the same. */
2276         if (flags & WINED3DTTFF_PROJECTED && sampler_type != WINED3DSTT_CUBE) {
2277             projected = TRUE;
2278             switch (flags & ~WINED3DTTFF_PROJECTED) {
2279                 case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
2280                 case WINED3DTTFF_COUNT2: mask = WINED3DSP_WRITEMASK_1; break;
2281                 case WINED3DTTFF_COUNT3: mask = WINED3DSP_WRITEMASK_2; break;
2282                 case WINED3DTTFF_COUNT4:
2283                 case WINED3DTTFF_DISABLE: mask = WINED3DSP_WRITEMASK_3; break;
2284             }
2285         } else {
2286             projected = FALSE;
2287         }
2288     }
2289     else if (shader_version < WINED3DPS_VERSION(2,0))
2290     {
2291         DWORD src_mod = arg->src[0] & WINED3DSP_SRCMOD_MASK;
2292
2293         if (src_mod == WINED3DSPSM_DZ) {
2294             projected = TRUE;
2295             mask = WINED3DSP_WRITEMASK_2;
2296         } else if (src_mod == WINED3DSPSM_DW) {
2297             projected = TRUE;
2298             mask = WINED3DSP_WRITEMASK_3;
2299         } else {
2300             projected = FALSE;
2301         }
2302     } else {
2303         if(arg->opcode_token & WINED3DSI_TEXLD_PROJECT) {
2304                 /* ps 2.0 texldp instruction always divides by the fourth component. */
2305                 projected = TRUE;
2306                 mask = WINED3DSP_WRITEMASK_3;
2307         } else {
2308             projected = FALSE;
2309         }
2310     }
2311
2312     if(deviceImpl->stateBlock->textures[sampler_idx] &&
2313        IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
2314         texrect = TRUE;
2315     }
2316
2317     shader_glsl_get_sample_function(sampler_type, projected, texrect, &sample_function);
2318     mask |= sample_function.coord_mask;
2319
2320     if (shader_version < WINED3DPS_VERSION(2,0)) shader_glsl_get_write_mask(arg->dst, dst_swizzle);
2321     else shader_glsl_get_swizzle(arg->src[1], FALSE, arg->dst, dst_swizzle);
2322
2323     /* 1.0-1.3: Use destination register as coordinate source.
2324        1.4+: Use provided coordinate source register. */
2325     if (shader_version < WINED3DPS_VERSION(1,4))
2326     {
2327         char coord_mask[6];
2328         shader_glsl_get_write_mask(mask, coord_mask);
2329         shader_addline(arg->buffer, "%s(Psampler%u, T%u%s)%s);\n",
2330                 sample_function.name, sampler_idx, sampler_idx, coord_mask, dst_swizzle);
2331     } else {
2332         glsl_src_param_t coord_param;
2333         shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], mask, &coord_param);
2334         if(arg->opcode_token & WINED3DSI_TEXLD_BIAS) {
2335             glsl_src_param_t bias;
2336             shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &bias);
2337
2338             shader_addline(arg->buffer, "%s(Psampler%u, %s, %s)%s);\n",
2339                     sample_function.name, sampler_idx, coord_param.param_str,
2340                     bias.param_str, dst_swizzle);
2341         } else {
2342             shader_addline(arg->buffer, "%s(Psampler%u, %s)%s);\n",
2343                     sample_function.name, sampler_idx, coord_param.param_str, dst_swizzle);
2344         }
2345     }
2346 }
2347
2348 static void shader_glsl_texldl(const SHADER_OPCODE_ARG *arg)
2349 {
2350     IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*)arg->shader;
2351     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
2352     glsl_sample_function_t sample_function;
2353     glsl_src_param_t coord_param, lod_param;
2354     char dst_swizzle[6];
2355     DWORD sampler_type;
2356     DWORD sampler_idx;
2357     BOOL texrect = FALSE;
2358
2359     shader_glsl_append_dst(arg->buffer, arg);
2360     shader_glsl_get_swizzle(arg->src[1], FALSE, arg->dst, dst_swizzle);
2361
2362     sampler_idx = arg->src[1] & WINED3DSP_REGNUM_MASK;
2363     sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
2364     if(deviceImpl->stateBlock->textures[sampler_idx] &&
2365        IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
2366         texrect = TRUE;
2367     }
2368     shader_glsl_get_sample_function(sampler_type, FALSE, texrect, &sample_function);    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], sample_function.coord_mask, &coord_param);
2369
2370     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &lod_param);
2371
2372     if (shader_is_pshader_version(arg->reg_maps->shader_version))
2373     {
2374         /* The GLSL spec claims the Lod sampling functions are only supported in vertex shaders.
2375          * However, they seem to work just fine in fragment shaders as well. */
2376         WARN("Using %sLod in fragment shader.\n", sample_function.name);
2377         shader_addline(arg->buffer, "%sLod(Psampler%u, %s, %s)%s);\n",
2378                 sample_function.name, sampler_idx, coord_param.param_str, lod_param.param_str, dst_swizzle);
2379     } else {
2380         shader_addline(arg->buffer, "%sLod(Vsampler%u, %s, %s)%s);\n",
2381                 sample_function.name, sampler_idx, coord_param.param_str, lod_param.param_str, dst_swizzle);
2382     }
2383 }
2384
2385 static void pshader_glsl_texcoord(const SHADER_OPCODE_ARG *arg)
2386 {
2387     /* FIXME: Make this work for more than just 2D textures */
2388     SHADER_BUFFER* buffer = arg->buffer;
2389     DWORD write_mask;
2390     char dst_mask[6];
2391
2392     write_mask = shader_glsl_append_dst(arg->buffer, arg);
2393     shader_glsl_get_write_mask(write_mask, dst_mask);
2394
2395     if (arg->reg_maps->shader_version != WINED3DPS_VERSION(1,4))
2396     {
2397         DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2398         shader_addline(buffer, "clamp(gl_TexCoord[%u], 0.0, 1.0)%s);\n", reg, dst_mask);
2399     } else {
2400         DWORD reg = arg->src[0] & WINED3DSP_REGNUM_MASK;
2401         DWORD src_mod = arg->src[0] & WINED3DSP_SRCMOD_MASK;
2402         char dst_swizzle[6];
2403
2404         shader_glsl_get_swizzle(arg->src[0], FALSE, write_mask, dst_swizzle);
2405
2406         if (src_mod == WINED3DSPSM_DZ) {
2407             glsl_src_param_t div_param;
2408             unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2409             shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_2, &div_param);
2410
2411             if (mask_size > 1) {
2412                 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
2413             } else {
2414                 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
2415             }
2416         } else if (src_mod == WINED3DSPSM_DW) {
2417             glsl_src_param_t div_param;
2418             unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2419             shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &div_param);
2420
2421             if (mask_size > 1) {
2422                 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
2423             } else {
2424                 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
2425             }
2426         } else {
2427             shader_addline(buffer, "gl_TexCoord[%u]%s);\n", reg, dst_swizzle);
2428         }
2429     }
2430 }
2431
2432 /** Process the WINED3DSIO_TEXDP3TEX instruction in GLSL:
2433  * Take a 3-component dot product of the TexCoord[dstreg] and src,
2434  * then perform a 1D texture lookup from stage dstregnum, place into dst. */
2435 static void pshader_glsl_texdp3tex(const SHADER_OPCODE_ARG *arg)
2436 {
2437     glsl_src_param_t src0_param;
2438     char dst_mask[6];
2439     glsl_sample_function_t sample_function;
2440     DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2441     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2442     DWORD sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
2443
2444     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2445
2446     shader_glsl_append_dst(arg->buffer, arg);
2447     shader_glsl_get_write_mask(arg->dst, dst_mask);
2448
2449     /* Do I have to take care about the projected bit? I don't think so, since the dp3 returns only one
2450      * scalar, and projected sampling would require 4.
2451      *
2452      * It is a dependent read - not valid with conditional NP2 textures
2453      */
2454     shader_glsl_get_sample_function(sampler_type, FALSE, FALSE, &sample_function);
2455
2456     switch(count_bits(sample_function.coord_mask)) {
2457         case 1:
2458             shader_addline(arg->buffer, "%s(Psampler%u, dot(gl_TexCoord[%u].xyz, %s))%s);\n",
2459                            sample_function.name, sampler_idx, sampler_idx, src0_param.param_str, dst_mask);
2460             break;
2461
2462         case 2:
2463             shader_addline(arg->buffer, "%s(Psampler%u, vec2(dot(gl_TexCoord[%u].xyz, %s), 0.0))%s);\n",
2464                           sample_function.name, sampler_idx, sampler_idx, src0_param.param_str, dst_mask);
2465             break;
2466
2467         case 3:
2468             shader_addline(arg->buffer, "%s(Psampler%u, vec3(dot(gl_TexCoord[%u].xyz, %s), 0.0, 0.0))%s);\n",
2469                            sample_function.name, sampler_idx, sampler_idx, src0_param.param_str, dst_mask);
2470             break;
2471         default:
2472             FIXME("Unexpected mask bitcount %d\n", count_bits(sample_function.coord_mask));
2473     }
2474 }
2475
2476 /** Process the WINED3DSIO_TEXDP3 instruction in GLSL:
2477  * Take a 3-component dot product of the TexCoord[dstreg] and src. */
2478 static void pshader_glsl_texdp3(const SHADER_OPCODE_ARG *arg)
2479 {
2480     glsl_src_param_t src0_param;
2481     DWORD dstreg = arg->dst & WINED3DSP_REGNUM_MASK;
2482     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2483     DWORD dst_mask;
2484     unsigned int mask_size;
2485
2486     dst_mask = shader_glsl_append_dst(arg->buffer, arg);
2487     mask_size = shader_glsl_get_write_mask_size(dst_mask);
2488     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2489
2490     if (mask_size > 1) {
2491         shader_addline(arg->buffer, "vec%d(dot(T%u.xyz, %s)));\n", mask_size, dstreg, src0_param.param_str);
2492     } else {
2493         shader_addline(arg->buffer, "dot(T%u.xyz, %s));\n", dstreg, src0_param.param_str);
2494     }
2495 }
2496
2497 /** Process the WINED3DSIO_TEXDEPTH instruction in GLSL:
2498  * Calculate the depth as dst.x / dst.y   */
2499 static void pshader_glsl_texdepth(const SHADER_OPCODE_ARG *arg)
2500 {
2501     glsl_dst_param_t dst_param;
2502
2503     shader_glsl_add_dst_param(arg, arg->dst, 0, &dst_param);
2504
2505     /* Tests show that texdepth never returns anything below 0.0, and that r5.y is clamped to 1.0.
2506      * Negative input is accepted, -0.25 / -0.5 returns 0.5. GL should clamp gl_FragDepth to [0;1], but
2507      * this doesn't always work, so clamp the results manually. Whether or not the x value is clamped at 1
2508      * too is irrelevant, since if x = 0, any y value < 1.0 (and > 1.0 is not allowed) results in a result
2509      * >= 1.0 or < 0.0
2510      */
2511     shader_addline(arg->buffer, "gl_FragDepth = clamp((%s.x / min(%s.y, 1.0)), 0.0, 1.0);\n", dst_param.reg_name, dst_param.reg_name);
2512 }
2513
2514 /** Process the WINED3DSIO_TEXM3X2DEPTH instruction in GLSL:
2515  * Last row of a 3x2 matrix multiply, use the result to calculate the depth:
2516  * Calculate tmp0.y = TexCoord[dstreg] . src.xyz;  (tmp0.x has already been calculated)
2517  * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y
2518  */
2519 static void pshader_glsl_texm3x2depth(const SHADER_OPCODE_ARG *arg)
2520 {
2521     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2522     DWORD dstreg = arg->dst & WINED3DSP_REGNUM_MASK;
2523     glsl_src_param_t src0_param;
2524
2525     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2526
2527     shader_addline(arg->buffer, "tmp0.y = dot(T%u.xyz, %s);\n", dstreg, src0_param.param_str);
2528     shader_addline(arg->buffer, "gl_FragDepth = (tmp0.y == 0.0) ? 1.0 : clamp(tmp0.x / tmp0.y, 0.0, 1.0);\n");
2529 }
2530
2531 /** Process the WINED3DSIO_TEXM3X2PAD instruction in GLSL
2532  * Calculate the 1st of a 2-row matrix multiplication. */
2533 static void pshader_glsl_texm3x2pad(const SHADER_OPCODE_ARG *arg)
2534 {
2535     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2536     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2537     SHADER_BUFFER* buffer = arg->buffer;
2538     glsl_src_param_t src0_param;
2539
2540     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2541     shader_addline(buffer, "tmp0.x = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
2542 }
2543
2544 /** Process the WINED3DSIO_TEXM3X3PAD instruction in GLSL
2545  * Calculate the 1st or 2nd row of a 3-row matrix multiplication. */
2546 static void pshader_glsl_texm3x3pad(const SHADER_OPCODE_ARG* arg)
2547 {
2548     IWineD3DPixelShaderImpl* shader = (IWineD3DPixelShaderImpl*) arg->shader;
2549     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2550     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2551     SHADER_BUFFER* buffer = arg->buffer;
2552     SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
2553     glsl_src_param_t src0_param;
2554
2555     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2556     shader_addline(buffer, "tmp0.%c = dot(T%u.xyz, %s);\n", 'x' + current_state->current_row, reg, src0_param.param_str);
2557     current_state->texcoord_w[current_state->current_row++] = reg;
2558 }
2559
2560 static void pshader_glsl_texm3x2tex(const SHADER_OPCODE_ARG *arg)
2561 {
2562     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2563     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2564     SHADER_BUFFER* buffer = arg->buffer;
2565     glsl_src_param_t src0_param;
2566     char dst_mask[6];
2567
2568     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2569     shader_addline(buffer, "tmp0.y = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
2570
2571     shader_glsl_append_dst(buffer, arg);
2572     shader_glsl_get_write_mask(arg->dst, dst_mask);
2573
2574     /* Sample the texture using the calculated coordinates */
2575     shader_addline(buffer, "texture2D(Psampler%u, tmp0.xy)%s);\n", reg, dst_mask);
2576 }
2577
2578 /** Process the WINED3DSIO_TEXM3X3TEX instruction in GLSL
2579  * Perform the 3rd row of a 3x3 matrix multiply, then sample the texture using the calculated coordinates */
2580 static void pshader_glsl_texm3x3tex(const SHADER_OPCODE_ARG *arg)
2581 {
2582     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2583     glsl_src_param_t src0_param;
2584     char dst_mask[6];
2585     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2586     IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
2587     SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
2588     DWORD sampler_type = arg->reg_maps->samplers[reg] & WINED3DSP_TEXTURETYPE_MASK;
2589     glsl_sample_function_t sample_function;
2590
2591     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2592     shader_addline(arg->buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
2593
2594     shader_glsl_append_dst(arg->buffer, arg);
2595     shader_glsl_get_write_mask(arg->dst, dst_mask);
2596     /* Dependent read, not valid with conditional NP2 */
2597     shader_glsl_get_sample_function(sampler_type, FALSE, FALSE, &sample_function);
2598
2599     /* Sample the texture using the calculated coordinates */
2600     shader_addline(arg->buffer, "%s(Psampler%u, tmp0.xyz)%s);\n", sample_function.name, reg, dst_mask);
2601
2602     current_state->current_row = 0;
2603 }
2604
2605 /** Process the WINED3DSIO_TEXM3X3 instruction in GLSL
2606  * Perform the 3rd row of a 3x3 matrix multiply */
2607 static void pshader_glsl_texm3x3(const SHADER_OPCODE_ARG *arg)
2608 {
2609     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2610     glsl_src_param_t src0_param;
2611     char dst_mask[6];
2612     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2613     IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
2614     SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
2615
2616     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2617
2618     shader_glsl_append_dst(arg->buffer, arg);
2619     shader_glsl_get_write_mask(arg->dst, dst_mask);
2620     shader_addline(arg->buffer, "vec4(tmp0.xy, dot(T%u.xyz, %s), 1.0)%s);\n", reg, src0_param.param_str, dst_mask);
2621
2622     current_state->current_row = 0;
2623 }
2624
2625 /** Process the WINED3DSIO_TEXM3X3SPEC instruction in GLSL 
2626  * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
2627 static void pshader_glsl_texm3x3spec(const SHADER_OPCODE_ARG *arg)
2628 {
2629     IWineD3DPixelShaderImpl* shader = (IWineD3DPixelShaderImpl*) arg->shader;
2630     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2631     glsl_src_param_t src0_param;
2632     glsl_src_param_t src1_param;
2633     char dst_mask[6];
2634     SHADER_BUFFER* buffer = arg->buffer;
2635     SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
2636     DWORD stype = arg->reg_maps->samplers[reg] & WINED3DSP_TEXTURETYPE_MASK;
2637     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2638     glsl_sample_function_t sample_function;
2639
2640     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2641     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], src_mask, &src1_param);
2642
2643     /* Perform the last matrix multiply operation */
2644     shader_addline(buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
2645     /* Reflection calculation */
2646     shader_addline(buffer, "tmp0.xyz = -reflect((%s), normalize(tmp0.xyz));\n", src1_param.param_str);
2647
2648     shader_glsl_append_dst(buffer, arg);
2649     shader_glsl_get_write_mask(arg->dst, dst_mask);
2650     /* Dependent read, not valid with conditional NP2 */
2651     shader_glsl_get_sample_function(stype, FALSE, FALSE, &sample_function);
2652
2653     /* Sample the texture */
2654     shader_addline(buffer, "%s(Psampler%u, tmp0.xyz)%s);\n", sample_function.name, reg, dst_mask);
2655
2656     current_state->current_row = 0;
2657 }
2658
2659 /** Process the WINED3DSIO_TEXM3X3VSPEC instruction in GLSL 
2660  * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
2661 static void pshader_glsl_texm3x3vspec(const SHADER_OPCODE_ARG *arg)
2662 {
2663     IWineD3DPixelShaderImpl* shader = (IWineD3DPixelShaderImpl*) arg->shader;
2664     DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
2665     SHADER_BUFFER* buffer = arg->buffer;
2666     SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
2667     glsl_src_param_t src0_param;
2668     char dst_mask[6];
2669     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2670     DWORD sampler_type = arg->reg_maps->samplers[reg] & WINED3DSP_TEXTURETYPE_MASK;
2671     glsl_sample_function_t sample_function;
2672
2673     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
2674
2675     /* Perform the last matrix multiply operation */
2676     shader_addline(buffer, "tmp0.z = dot(vec3(T%u), vec3(%s));\n", reg, src0_param.param_str);
2677
2678     /* Construct the eye-ray vector from w coordinates */
2679     shader_addline(buffer, "tmp1.xyz = normalize(vec3(gl_TexCoord[%u].w, gl_TexCoord[%u].w, gl_TexCoord[%u].w));\n",
2680             current_state->texcoord_w[0], current_state->texcoord_w[1], reg);
2681     shader_addline(buffer, "tmp0.xyz = -reflect(tmp1.xyz, normalize(tmp0.xyz));\n");
2682
2683     shader_glsl_append_dst(buffer, arg);
2684     shader_glsl_get_write_mask(arg->dst, dst_mask);
2685     /* Dependent read, not valid with conditional NP2 */
2686     shader_glsl_get_sample_function(sampler_type, FALSE, FALSE, &sample_function);
2687
2688     /* Sample the texture using the calculated coordinates */
2689     shader_addline(buffer, "%s(Psampler%u, tmp0.xyz)%s);\n", sample_function.name, reg, dst_mask);
2690
2691     current_state->current_row = 0;
2692 }
2693
2694 /** Process the WINED3DSIO_TEXBEM instruction in GLSL.
2695  * Apply a fake bump map transform.
2696  * texbem is pshader <= 1.3 only, this saves a few version checks
2697  */
2698 static void pshader_glsl_texbem(const SHADER_OPCODE_ARG *arg)
2699 {
2700     IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
2701     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
2702     char dst_swizzle[6];
2703     glsl_sample_function_t sample_function;
2704     glsl_src_param_t coord_param;
2705     DWORD sampler_type;
2706     DWORD sampler_idx;
2707     DWORD mask;
2708     DWORD flags;
2709     char coord_mask[6];
2710
2711     sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2712     flags = deviceImpl->stateBlock->textureState[sampler_idx][WINED3DTSS_TEXTURETRANSFORMFLAGS];
2713
2714     sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
2715     /* Dependent read, not valid with conditional NP2 */
2716     shader_glsl_get_sample_function(sampler_type, FALSE, FALSE, &sample_function);
2717     mask = sample_function.coord_mask;
2718
2719     shader_glsl_get_write_mask(arg->dst, dst_swizzle);
2720
2721     shader_glsl_get_write_mask(mask, coord_mask);
2722
2723     /* with projective textures, texbem only divides the static texture coord, not the displacement,
2724          * so we can't let the GL handle this.
2725          */
2726     if (flags & WINED3DTTFF_PROJECTED) {
2727         DWORD div_mask=0;
2728         char coord_div_mask[3];
2729         switch (flags & ~WINED3DTTFF_PROJECTED) {
2730             case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
2731             case WINED3DTTFF_COUNT2: div_mask = WINED3DSP_WRITEMASK_1; break;
2732             case WINED3DTTFF_COUNT3: div_mask = WINED3DSP_WRITEMASK_2; break;
2733             case WINED3DTTFF_COUNT4:
2734             case WINED3DTTFF_DISABLE: div_mask = WINED3DSP_WRITEMASK_3; break;
2735         }
2736         shader_glsl_get_write_mask(div_mask, coord_div_mask);
2737         shader_addline(arg->buffer, "T%u%s /= T%u%s;\n", sampler_idx, coord_mask, sampler_idx, coord_div_mask);
2738     }
2739
2740     shader_glsl_append_dst(arg->buffer, arg);
2741     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0|WINED3DSP_WRITEMASK_1, &coord_param);
2742     if(arg->opcode->opcode == WINED3DSIO_TEXBEML) {
2743         glsl_src_param_t luminance_param;
2744         shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_2, &luminance_param);
2745         shader_addline(arg->buffer, "(%s(Psampler%u, T%u%s + vec4(bumpenvmat%d * %s, 0.0, 0.0)%s )*(%s * luminancescale%d + luminanceoffset%d))%s);\n",
2746                        sample_function.name, sampler_idx, sampler_idx, coord_mask, sampler_idx, coord_param.param_str, coord_mask,
2747                        luminance_param.param_str, sampler_idx, sampler_idx, dst_swizzle);
2748     } else {
2749         shader_addline(arg->buffer, "%s(Psampler%u, T%u%s + vec4(bumpenvmat%d * %s, 0.0, 0.0)%s )%s);\n",
2750                        sample_function.name, sampler_idx, sampler_idx, coord_mask, sampler_idx, coord_param.param_str, coord_mask, dst_swizzle);
2751     }
2752 }
2753
2754 static void pshader_glsl_bem(const SHADER_OPCODE_ARG *arg)
2755 {
2756     glsl_src_param_t src0_param, src1_param;
2757     DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2758
2759     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0|WINED3DSP_WRITEMASK_1, &src0_param);
2760     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0|WINED3DSP_WRITEMASK_1, &src1_param);
2761
2762     shader_glsl_append_dst(arg->buffer, arg);
2763     shader_addline(arg->buffer, "%s + bumpenvmat%d * %s);\n",
2764                    src0_param.param_str, sampler_idx, src1_param.param_str);
2765 }
2766
2767 /** Process the WINED3DSIO_TEXREG2AR instruction in GLSL
2768  * Sample 2D texture at dst using the alpha & red (wx) components of src as texture coordinates */
2769 static void pshader_glsl_texreg2ar(const SHADER_OPCODE_ARG *arg)
2770 {
2771     glsl_src_param_t src0_param;
2772     DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2773     char dst_mask[6];
2774
2775     shader_glsl_append_dst(arg->buffer, arg);
2776     shader_glsl_get_write_mask(arg->dst, dst_mask);
2777     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
2778
2779     shader_addline(arg->buffer, "texture2D(Psampler%u, %s.wx)%s);\n", sampler_idx, src0_param.reg_name, dst_mask);
2780 }
2781
2782 /** Process the WINED3DSIO_TEXREG2GB instruction in GLSL
2783  * Sample 2D texture at dst using the green & blue (yz) components of src as texture coordinates */
2784 static void pshader_glsl_texreg2gb(const SHADER_OPCODE_ARG *arg)
2785 {
2786     glsl_src_param_t src0_param;
2787     DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2788     char dst_mask[6];
2789
2790     shader_glsl_append_dst(arg->buffer, arg);
2791     shader_glsl_get_write_mask(arg->dst, dst_mask);
2792     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
2793
2794     shader_addline(arg->buffer, "texture2D(Psampler%u, %s.yz)%s);\n", sampler_idx, src0_param.reg_name, dst_mask);
2795 }
2796
2797 /** Process the WINED3DSIO_TEXREG2RGB instruction in GLSL
2798  * Sample texture at dst using the rgb (xyz) components of src as texture coordinates */
2799 static void pshader_glsl_texreg2rgb(const SHADER_OPCODE_ARG *arg)
2800 {
2801     glsl_src_param_t src0_param;
2802     char dst_mask[6];
2803     DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
2804     DWORD sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
2805     glsl_sample_function_t sample_function;
2806
2807     shader_glsl_append_dst(arg->buffer, arg);
2808     shader_glsl_get_write_mask(arg->dst, dst_mask);
2809     /* Dependent read, not valid with conditional NP2 */
2810     shader_glsl_get_sample_function(sampler_type, FALSE, FALSE, &sample_function);
2811     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], sample_function.coord_mask, &src0_param);
2812
2813     shader_addline(arg->buffer, "%s(Psampler%u, %s)%s);\n", sample_function.name, sampler_idx, src0_param.param_str, dst_mask);
2814 }
2815
2816 /** Process the WINED3DSIO_TEXKILL instruction in GLSL.
2817  * If any of the first 3 components are < 0, discard this pixel */
2818 static void pshader_glsl_texkill(const SHADER_OPCODE_ARG *arg)
2819 {
2820     glsl_dst_param_t dst_param;
2821
2822     /* The argument is a destination parameter, and no writemasks are allowed */
2823     shader_glsl_add_dst_param(arg, arg->dst, 0, &dst_param);
2824     if ((arg->reg_maps->shader_version >= WINED3DPS_VERSION(2,0)))
2825     {
2826         /* 2.0 shaders compare all 4 components in texkill */
2827         shader_addline(arg->buffer, "if (any(lessThan(%s.xyzw, vec4(0.0)))) discard;\n", dst_param.reg_name);
2828     } else {
2829         /* 1.X shaders only compare the first 3 components, probably due to the nature of the texkill
2830          * instruction as a tex* instruction, and phase, which kills all a / w components. Even if all
2831          * 4 components are defined, only the first 3 are used
2832          */
2833         shader_addline(arg->buffer, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;\n", dst_param.reg_name);
2834     }
2835 }
2836
2837 /** Process the WINED3DSIO_DP2ADD instruction in GLSL.
2838  * dst = dot2(src0, src1) + src2 */
2839 static void pshader_glsl_dp2add(const SHADER_OPCODE_ARG *arg)
2840 {
2841     glsl_src_param_t src0_param;
2842     glsl_src_param_t src1_param;
2843     glsl_src_param_t src2_param;
2844     DWORD write_mask;
2845     unsigned int mask_size;
2846
2847     write_mask = shader_glsl_append_dst(arg->buffer, arg);
2848     mask_size = shader_glsl_get_write_mask_size(write_mask);
2849
2850     shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
2851     shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
2852     shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], WINED3DSP_WRITEMASK_0, &src2_param);
2853
2854     if (mask_size > 1) {
2855         shader_addline(arg->buffer, "vec%d(dot(%s, %s) + %s));\n", mask_size, src0_param.param_str, src1_param.param_str, src2_param.param_str);
2856     } else {
2857         shader_addline(arg->buffer, "dot(%s, %s) + %s);\n", src0_param.param_str, src1_param.param_str, src2_param.param_str);
2858     }
2859 }
2860
2861 static void pshader_glsl_input_pack(SHADER_BUFFER* buffer, const struct semantic* semantics_in,
2862         IWineD3DPixelShader *iface, enum vertexprocessing_mode vertexprocessing)
2863 {
2864    unsigned int i;
2865    IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *) iface;
2866
2867    for (i = 0; i < MAX_REG_INPUT; i++) {
2868
2869        DWORD usage_token = semantics_in[i].usage;
2870        DWORD register_token = semantics_in[i].reg;
2871        DWORD usage, usage_idx;
2872        char reg_mask[6];
2873
2874        /* Uninitialized */
2875        if (!usage_token) continue;
2876        usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
2877        usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
2878        shader_glsl_get_write_mask(register_token, reg_mask);
2879
2880        switch(usage) {
2881
2882            case WINED3DDECLUSAGE_TEXCOORD:
2883                if(usage_idx < 8 && vertexprocessing == pretransformed) {
2884                    shader_addline(buffer, "IN[%u]%s = gl_TexCoord[%u]%s;\n",
2885                                   This->input_reg_map[i], reg_mask, usage_idx, reg_mask);
2886                } else {
2887                    shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
2888                                   This->input_reg_map[i], reg_mask, reg_mask);
2889                }
2890                break;
2891
2892            case WINED3DDECLUSAGE_COLOR:
2893                if (usage_idx == 0)
2894                    shader_addline(buffer, "IN[%u]%s = vec4(gl_Color)%s;\n",
2895                        This->input_reg_map[i], reg_mask, reg_mask);
2896                else if (usage_idx == 1)
2897                    shader_addline(buffer, "IN[%u]%s = vec4(gl_SecondaryColor)%s;\n",
2898                        This->input_reg_map[i], reg_mask, reg_mask);
2899                else
2900                    shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
2901                        This->input_reg_map[i], reg_mask, reg_mask);
2902                break;
2903
2904            default:
2905                shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
2906                    This->input_reg_map[i], reg_mask, reg_mask);
2907         }
2908     }
2909 }
2910
2911 /*********************************************
2912  * Vertex Shader Specific Code begins here
2913  ********************************************/
2914
2915 static void add_glsl_program_entry(struct shader_glsl_priv *priv, struct glsl_shader_prog_link *entry) {
2916     glsl_program_key_t *key;
2917
2918     key = HeapAlloc(GetProcessHeap(), 0, sizeof(glsl_program_key_t));
2919     key->vshader = entry->vshader;
2920     key->pshader = entry->pshader;
2921     key->ps_args = entry->ps_args;
2922
2923     hash_table_put(priv->glsl_program_lookup, key, entry);
2924 }
2925
2926 static struct glsl_shader_prog_link *get_glsl_program_entry(struct shader_glsl_priv *priv,
2927         GLhandleARB vshader, IWineD3DPixelShader *pshader, struct ps_compile_args *ps_args) {
2928     glsl_program_key_t key;
2929
2930     key.vshader = vshader;
2931     key.pshader = pshader;
2932     key.ps_args = *ps_args;
2933
2934     return (struct glsl_shader_prog_link *)hash_table_get(priv->glsl_program_lookup, &key);
2935 }
2936
2937 static void delete_glsl_program_entry(struct shader_glsl_priv *priv, const WineD3D_GL_Info *gl_info,
2938         struct glsl_shader_prog_link *entry)
2939 {
2940     glsl_program_key_t *key;
2941
2942     key = HeapAlloc(GetProcessHeap(), 0, sizeof(glsl_program_key_t));
2943     key->vshader = entry->vshader;
2944     key->pshader = entry->pshader;
2945     key->ps_args = entry->ps_args;
2946     hash_table_remove(priv->glsl_program_lookup, key);
2947
2948     GL_EXTCALL(glDeleteObjectARB(entry->programId));
2949     if (entry->vshader) list_remove(&entry->vshader_entry);
2950     if (entry->pshader) list_remove(&entry->pshader_entry);
2951     HeapFree(GetProcessHeap(), 0, entry->vuniformF_locations);
2952     HeapFree(GetProcessHeap(), 0, entry->puniformF_locations);
2953     HeapFree(GetProcessHeap(), 0, entry);
2954 }
2955
2956 static void handle_ps3_input(SHADER_BUFFER *buffer, const struct semantic *semantics_in,
2957         const struct semantic *semantics_out, const WineD3D_GL_Info *gl_info, const DWORD *map)
2958 {
2959     unsigned int i, j;
2960     DWORD usage_token, usage_token_out;
2961     DWORD register_token, register_token_out;
2962     DWORD usage, usage_idx, usage_out, usage_idx_out;
2963     DWORD *set;
2964     DWORD in_idx;
2965     DWORD in_count = GL_LIMITS(glsl_varyings) / 4;
2966     char reg_mask[6], reg_mask_out[6];
2967     char destination[50];
2968
2969     set = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*set) * (in_count + 2));
2970
2971     if (!semantics_out) {
2972         /* Save gl_FrontColor & gl_FrontSecondaryColor before overwriting them. */
2973         shader_addline(buffer, "vec4 front_color = gl_FrontColor;\n");
2974         shader_addline(buffer, "vec4 front_secondary_color = gl_FrontSecondaryColor;\n");
2975     }
2976
2977     for(i = 0; i < MAX_REG_INPUT; i++) {
2978         usage_token = semantics_in[i].usage;
2979         if (!usage_token) continue;
2980
2981         in_idx = map[i];
2982         if (in_idx >= (in_count + 2)) {
2983             FIXME("More input varyings declared than supported, expect issues\n");
2984             continue;
2985         } else if(map[i] == -1) {
2986             /* Declared, but not read register */
2987             continue;
2988         }
2989
2990         if (in_idx == in_count) {
2991             sprintf(destination, "gl_FrontColor");
2992         } else if (in_idx == in_count + 1) {
2993             sprintf(destination, "gl_FrontSecondaryColor");
2994         } else {
2995             sprintf(destination, "IN[%u]", in_idx);
2996         }
2997
2998         register_token = semantics_in[i].reg;
2999
3000         usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
3001         usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
3002         set[map[i]] = shader_glsl_get_write_mask(register_token, reg_mask);
3003
3004         if(!semantics_out) {
3005             switch(usage) {
3006                 case WINED3DDECLUSAGE_COLOR:
3007                     if (usage_idx == 0)
3008                         shader_addline(buffer, "%s%s = front_color%s;\n",
3009                                        destination, reg_mask, reg_mask);
3010                     else if (usage_idx == 1)
3011                         shader_addline(buffer, "%s%s = front_secondary_color%s;\n",
3012                                        destination, reg_mask, reg_mask);
3013                     else
3014                         shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3015                                        destination, reg_mask, reg_mask);
3016                     break;
3017
3018                 case WINED3DDECLUSAGE_TEXCOORD:
3019                     if (usage_idx < 8) {
3020                         shader_addline(buffer, "%s%s = gl_TexCoord[%u]%s;\n",
3021                                        destination, reg_mask, usage_idx, reg_mask);
3022                     } else {
3023                         shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3024                                        destination, reg_mask, reg_mask);
3025                     }
3026                     break;
3027
3028                 case WINED3DDECLUSAGE_FOG:
3029                     shader_addline(buffer, "%s%s = vec4(gl_FogFragCoord, 0.0, 0.0, 0.0)%s;\n",
3030                                    destination, reg_mask, reg_mask);
3031                     break;
3032
3033                 default:
3034                     shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3035                                    destination, reg_mask, reg_mask);
3036             }
3037         } else {
3038             BOOL found = FALSE;
3039             for(j = 0; j < MAX_REG_OUTPUT; j++) {
3040                 usage_token_out = semantics_out[j].usage;
3041                 if (!usage_token_out) continue;
3042                 register_token_out = semantics_out[j].reg;
3043
3044                 usage_out = (usage_token_out & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
3045                 usage_idx_out = (usage_token_out & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
3046                 shader_glsl_get_write_mask(register_token_out, reg_mask_out);
3047
3048                 if(usage == usage_out &&
3049                    usage_idx == usage_idx_out) {
3050                     shader_addline(buffer, "%s%s = OUT[%u]%s;\n",
3051                                    destination, reg_mask, j, reg_mask);
3052                     found = TRUE;
3053                 }
3054             }
3055             if(!found) {
3056                 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3057                                destination, reg_mask, reg_mask);
3058             }
3059         }
3060     }
3061
3062     /* This is solely to make the compiler / linker happy and avoid warning about undefined
3063      * varyings. It shouldn't result in any real code executed on the GPU, since all read
3064      * input varyings are assigned above, if the optimizer works properly.
3065      */
3066     for(i = 0; i < in_count + 2; i++) {
3067         if(set[i] != WINED3DSP_WRITEMASK_ALL) {
3068             unsigned int size = 0;
3069             memset(reg_mask, 0, sizeof(reg_mask));
3070             if(!(set[i] & WINED3DSP_WRITEMASK_0)) {
3071                 reg_mask[size] = 'x';
3072                 size++;
3073             }
3074             if(!(set[i] & WINED3DSP_WRITEMASK_1)) {
3075                 reg_mask[size] = 'y';
3076                 size++;
3077             }
3078             if(!(set[i] & WINED3DSP_WRITEMASK_2)) {
3079                 reg_mask[size] = 'z';
3080                 size++;
3081             }
3082             if(!(set[i] & WINED3DSP_WRITEMASK_3)) {
3083                 reg_mask[size] = 'w';
3084                 size++;
3085             }
3086
3087             if (i == in_count) {
3088                 sprintf(destination, "gl_FrontColor");
3089             } else if (i == in_count + 1) {
3090                 sprintf(destination, "gl_FrontSecondaryColor");
3091             } else {
3092                 sprintf(destination, "IN[%u]", i);
3093             }
3094
3095             if (size == 1) {
3096                 shader_addline(buffer, "%s.%s = 0.0;\n", destination, reg_mask);
3097             } else {
3098                 shader_addline(buffer, "%s.%s = vec%u(0.0);\n", destination, reg_mask, size);
3099             }
3100         }
3101     }
3102
3103     HeapFree(GetProcessHeap(), 0, set);
3104 }
3105
3106 static GLhandleARB generate_param_reorder_function(IWineD3DVertexShader *vertexshader,
3107         IWineD3DPixelShader *pixelshader, const WineD3D_GL_Info *gl_info)
3108 {
3109     GLhandleARB ret = 0;
3110     IWineD3DVertexShaderImpl *vs = (IWineD3DVertexShaderImpl *) vertexshader;
3111     IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) pixelshader;
3112     IWineD3DDeviceImpl *device;
3113     DWORD vs_major = WINED3DSHADER_VERSION_MAJOR(vs->baseShader.reg_maps.shader_version);
3114     DWORD ps_major = ps ? WINED3DSHADER_VERSION_MAJOR(ps->baseShader.reg_maps.shader_version) : 0;
3115     unsigned int i;
3116     SHADER_BUFFER buffer;
3117     DWORD usage_token;
3118     DWORD register_token;
3119     DWORD usage, usage_idx, writemask;
3120     char reg_mask[6];
3121     const struct semantic *semantics_out, *semantics_in;
3122
3123     shader_buffer_init(&buffer);
3124
3125     shader_addline(&buffer, "#version 120\n");
3126
3127     if(vs_major < 3 && ps_major < 3) {
3128         /* That one is easy: The vertex shader writes to the builtin varyings, the pixel shader reads from them.
3129          * Take care about the texcoord .w fixup though if we're using the fixed function fragment pipeline
3130          */
3131         device = (IWineD3DDeviceImpl *) vs->baseShader.device;
3132         if((GLINFO_LOCATION).set_texcoord_w && ps_major == 0 && vs_major > 0 &&
3133             !device->frag_pipe->ffp_proj_control) {
3134             shader_addline(&buffer, "void order_ps_input() {\n");
3135             for(i = 0; i < min(8, MAX_REG_TEXCRD); i++) {
3136                 if(vs->baseShader.reg_maps.texcoord_mask[i] != 0 &&
3137                    vs->baseShader.reg_maps.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) {
3138                     shader_addline(&buffer, "gl_TexCoord[%u].w = 1.0;\n", i);
3139                 }
3140             }
3141             shader_addline(&buffer, "}\n");
3142         } else {
3143             shader_addline(&buffer, "void order_ps_input() { /* do nothing */ }\n");
3144         }
3145     } else if(ps_major < 3 && vs_major >= 3) {
3146         /* The vertex shader writes to its own varyings, the pixel shader needs them in the builtin ones */
3147         semantics_out = vs->semantics_out;
3148
3149         shader_addline(&buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
3150         for(i = 0; i < MAX_REG_OUTPUT; i++) {
3151             usage_token = semantics_out[i].usage;
3152             if (!usage_token) continue;
3153             register_token = semantics_out[i].reg;
3154
3155             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
3156             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
3157             writemask = shader_glsl_get_write_mask(register_token, reg_mask);
3158
3159             switch(usage) {
3160                 case WINED3DDECLUSAGE_COLOR:
3161                     if (usage_idx == 0)
3162                         shader_addline(&buffer, "gl_FrontColor%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
3163                     else if (usage_idx == 1)
3164                         shader_addline(&buffer, "gl_FrontSecondaryColor%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
3165                     break;
3166
3167                 case WINED3DDECLUSAGE_POSITION:
3168                     shader_addline(&buffer, "gl_Position%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
3169                     break;
3170
3171                 case WINED3DDECLUSAGE_TEXCOORD:
3172                     if (usage_idx < 8) {
3173                         if(!(GLINFO_LOCATION).set_texcoord_w || ps_major > 0) writemask |= WINED3DSP_WRITEMASK_3;
3174
3175                         shader_addline(&buffer, "gl_TexCoord[%u]%s = OUT[%u]%s;\n",
3176                                         usage_idx, reg_mask, i, reg_mask);
3177                         if(!(writemask & WINED3DSP_WRITEMASK_3)) {
3178                             shader_addline(&buffer, "gl_TexCoord[%u].w = 1.0;\n", usage_idx);
3179                         }
3180                     }
3181                     break;
3182
3183                 case WINED3DDECLUSAGE_PSIZE:
3184                     shader_addline(&buffer, "gl_PointSize = OUT[%u].x;\n", i);
3185                     break;
3186
3187                 case WINED3DDECLUSAGE_FOG:
3188                     shader_addline(&buffer, "gl_FogFragCoord = OUT[%u].%c;\n", i, reg_mask[1]);
3189                     break;
3190
3191                 default:
3192                     break;
3193             }
3194         }
3195         shader_addline(&buffer, "}\n");
3196
3197     } else if(ps_major >= 3 && vs_major >= 3) {
3198         semantics_out = vs->semantics_out;
3199         semantics_in = ps->semantics_in;
3200
3201         /* This one is tricky: a 3.0 pixel shader reads from a 3.0 vertex shader */
3202         shader_addline(&buffer, "varying vec4 IN[%u];\n", GL_LIMITS(glsl_varyings) / 4);
3203         shader_addline(&buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
3204
3205         /* First, sort out position and point size. Those are not passed to the pixel shader */
3206         for(i = 0; i < MAX_REG_OUTPUT; i++) {
3207             usage_token = semantics_out[i].usage;
3208             if (!usage_token) continue;
3209             register_token = semantics_out[i].reg;
3210
3211             usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
3212             usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
3213             shader_glsl_get_write_mask(register_token, reg_mask);
3214
3215             switch(usage) {
3216                 case WINED3DDECLUSAGE_POSITION:
3217                     shader_addline(&buffer, "gl_Position%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
3218                     break;
3219
3220                 case WINED3DDECLUSAGE_PSIZE:
3221                     shader_addline(&buffer, "gl_PointSize = OUT[%u].x;\n", i);
3222                     break;
3223
3224                 default:
3225                     break;
3226             }
3227         }
3228
3229         /* Then, fix the pixel shader input */
3230         handle_ps3_input(&buffer, semantics_in, semantics_out, gl_info, ps->input_reg_map);
3231
3232         shader_addline(&buffer, "}\n");
3233     } else if(ps_major >= 3 && vs_major < 3) {
3234         semantics_in = ps->semantics_in;
3235
3236         shader_addline(&buffer, "varying vec4 IN[%u];\n", GL_LIMITS(glsl_varyings) / 4);
3237         shader_addline(&buffer, "void order_ps_input() {\n");
3238         /* The vertex shader wrote to the builtin varyings. There is no need to figure out position and
3239          * point size, but we depend on the optimizers kindness to find out that the pixel shader doesn't
3240          * read gl_TexCoord and gl_ColorX, otherwise we'll run out of varyings
3241          */
3242         handle_ps3_input(&buffer, semantics_in, NULL, gl_info, ps->input_reg_map);
3243         shader_addline(&buffer, "}\n");
3244     } else {
3245         ERR("Unexpected vertex and pixel shader version condition: vs: %d, ps: %d\n", vs_major, ps_major);
3246     }
3247
3248     ret = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
3249     checkGLcall("glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)");
3250     GL_EXTCALL(glShaderSourceARB(ret, 1, (const char**)&buffer.buffer, NULL));
3251     checkGLcall("glShaderSourceARB(ret, 1, (const char**)&buffer.buffer, NULL)");
3252     GL_EXTCALL(glCompileShaderARB(ret));
3253     checkGLcall("glCompileShaderARB(ret)");
3254
3255     shader_buffer_free(&buffer);
3256     return ret;
3257 }
3258
3259 static void hardcode_local_constants(IWineD3DBaseShaderImpl *shader, const WineD3D_GL_Info *gl_info,
3260         GLhandleARB programId, char prefix)
3261 {
3262     const local_constant *lconst;
3263     GLuint tmp_loc;
3264     const float *value;
3265     char glsl_name[8];
3266
3267     LIST_FOR_EACH_ENTRY(lconst, &shader->baseShader.constantsF, local_constant, entry) {
3268         value = (const float *)lconst->value;
3269         snprintf(glsl_name, sizeof(glsl_name), "%cLC%u", prefix, lconst->idx);
3270         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
3271         GL_EXTCALL(glUniform4fvARB(tmp_loc, 1, value));
3272     }
3273     checkGLcall("Hardcoding local constants\n");
3274 }
3275
3276 /** Sets the GLSL program ID for the given pixel and vertex shader combination.
3277  * It sets the programId on the current StateBlock (because it should be called
3278  * inside of the DrawPrimitive() part of the render loop).
3279  *
3280  * If a program for the given combination does not exist, create one, and store
3281  * the program in the hash table.  If it creates a program, it will link the
3282  * given objects, too.
3283  */
3284 static void set_glsl_shader_program(IWineD3DDevice *iface, BOOL use_ps, BOOL use_vs) {
3285     IWineD3DDeviceImpl *This               = (IWineD3DDeviceImpl *)iface;
3286     struct shader_glsl_priv *priv          = (struct shader_glsl_priv *)This->shader_priv;
3287     const WineD3D_GL_Info *gl_info         = &This->adapter->gl_info;
3288     IWineD3DPixelShader  *pshader          = This->stateBlock->pixelShader;
3289     IWineD3DVertexShader *vshader          = This->stateBlock->vertexShader;
3290     struct glsl_shader_prog_link *entry    = NULL;
3291     GLhandleARB programId                  = 0;
3292     GLhandleARB reorder_shader_id          = 0;
3293     int i;
3294     char glsl_name[8];
3295     GLhandleARB vshader_id, pshader_id;
3296     struct ps_compile_args compile_args;
3297
3298     if(use_vs) {
3299         IWineD3DVertexShaderImpl_CompileShader(vshader);
3300         vshader_id = ((IWineD3DVertexShaderImpl*)vshader)->prgId;
3301     } else {
3302         vshader_id = 0;
3303     }
3304     if(use_ps) {
3305         find_ps_compile_args((IWineD3DPixelShaderImpl*)This->stateBlock->pixelShader, This->stateBlock, &compile_args);
3306     } else {
3307         /* FIXME: Do we really have to spend CPU cycles to generate a few zeroed bytes? */
3308         memset(&compile_args, 0, sizeof(compile_args));
3309     }
3310     entry = get_glsl_program_entry(priv, vshader_id, pshader, &compile_args);
3311     if (entry) {
3312         priv->glsl_program = entry;
3313         return;
3314     }
3315
3316     /* If we get to this point, then no matching program exists, so we create one */
3317     programId = GL_EXTCALL(glCreateProgramObjectARB());
3318     TRACE("Created new GLSL shader program %u\n", programId);
3319
3320     /* Create the entry */
3321     entry = HeapAlloc(GetProcessHeap(), 0, sizeof(struct glsl_shader_prog_link));
3322     entry->programId = programId;
3323     entry->vshader = vshader_id;
3324     entry->pshader = pshader;
3325     entry->ps_args = compile_args;
3326     entry->constant_version = 0;
3327     /* Add the hash table entry */
3328     add_glsl_program_entry(priv, entry);
3329
3330     /* Set the current program */
3331     priv->glsl_program = entry;
3332
3333     /* Attach GLSL vshader */
3334     if (vshader_id) {
3335         int max_attribs = 16;   /* TODO: Will this always be the case? It is at the moment... */
3336         char tmp_name[10];
3337
3338         reorder_shader_id = generate_param_reorder_function(vshader, pshader, gl_info);
3339         TRACE("Attaching GLSL shader object %u to program %u\n", reorder_shader_id, programId);
3340         GL_EXTCALL(glAttachObjectARB(programId, reorder_shader_id));
3341         checkGLcall("glAttachObjectARB");
3342         /* Flag the reorder function for deletion, then it will be freed automatically when the program
3343          * is destroyed
3344          */
3345         GL_EXTCALL(glDeleteObjectARB(reorder_shader_id));
3346
3347         TRACE("Attaching GLSL shader object %u to program %u\n", vshader_id, programId);
3348         GL_EXTCALL(glAttachObjectARB(programId, vshader_id));
3349         checkGLcall("glAttachObjectARB");
3350
3351         /* Bind vertex attributes to a corresponding index number to match
3352          * the same index numbers as ARB_vertex_programs (makes loading
3353          * vertex attributes simpler).  With this method, we can use the
3354          * exact same code to load the attributes later for both ARB and
3355          * GLSL shaders.
3356          *
3357          * We have to do this here because we need to know the Program ID
3358          * in order to make the bindings work, and it has to be done prior
3359          * to linking the GLSL program. */
3360         for (i = 0; i < max_attribs; ++i) {
3361             if (((IWineD3DBaseShaderImpl*)vshader)->baseShader.reg_maps.attributes[i]) {
3362                 snprintf(tmp_name, sizeof(tmp_name), "attrib%i", i);
3363                 GL_EXTCALL(glBindAttribLocationARB(programId, i, tmp_name));
3364             }
3365         }
3366         checkGLcall("glBindAttribLocationARB");
3367
3368         list_add_head(&((IWineD3DBaseShaderImpl *)vshader)->baseShader.linked_programs, &entry->vshader_entry);
3369     }
3370
3371     if(use_ps) {
3372         pshader_id = find_gl_pshader((IWineD3DPixelShaderImpl *) pshader, &compile_args);
3373     } else {
3374         pshader_id = 0;
3375     }
3376
3377     /* Attach GLSL pshader */
3378     if (pshader_id) {
3379         TRACE("Attaching GLSL shader object %u to program %u\n", pshader_id, programId);
3380         GL_EXTCALL(glAttachObjectARB(programId, pshader_id));
3381         checkGLcall("glAttachObjectARB");
3382
3383         list_add_head(&((IWineD3DBaseShaderImpl *)pshader)->baseShader.linked_programs, &entry->pshader_entry);
3384     }
3385
3386     /* Link the program */
3387     TRACE("Linking GLSL shader program %u\n", programId);
3388     GL_EXTCALL(glLinkProgramARB(programId));
3389     print_glsl_info_log(&GLINFO_LOCATION, programId);
3390
3391     entry->vuniformF_locations = HeapAlloc(GetProcessHeap(), 0, sizeof(GLhandleARB) * GL_LIMITS(vshader_constantsF));
3392     for (i = 0; i < GL_LIMITS(vshader_constantsF); ++i) {
3393         snprintf(glsl_name, sizeof(glsl_name), "VC[%i]", i);
3394         entry->vuniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
3395     }
3396     for (i = 0; i < MAX_CONST_I; ++i) {
3397         snprintf(glsl_name, sizeof(glsl_name), "VI[%i]", i);
3398         entry->vuniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
3399     }
3400     entry->puniformF_locations = HeapAlloc(GetProcessHeap(), 0, sizeof(GLhandleARB) * GL_LIMITS(pshader_constantsF));
3401     for (i = 0; i < GL_LIMITS(pshader_constantsF); ++i) {
3402         snprintf(glsl_name, sizeof(glsl_name), "PC[%i]", i);
3403         entry->puniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
3404     }
3405     for (i = 0; i < MAX_CONST_I; ++i) {
3406         snprintf(glsl_name, sizeof(glsl_name), "PI[%i]", i);
3407         entry->puniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
3408     }
3409
3410     if(pshader) {
3411         for(i = 0; i < ((IWineD3DPixelShaderImpl*)pshader)->numbumpenvmatconsts; i++) {
3412             char name[32];
3413             sprintf(name, "bumpenvmat%d", ((IWineD3DPixelShaderImpl*)pshader)->bumpenvmatconst[i].texunit);
3414             entry->bumpenvmat_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
3415             sprintf(name, "luminancescale%d", ((IWineD3DPixelShaderImpl*)pshader)->luminanceconst[i].texunit);
3416             entry->luminancescale_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
3417             sprintf(name, "luminanceoffset%d", ((IWineD3DPixelShaderImpl*)pshader)->luminanceconst[i].texunit);
3418             entry->luminanceoffset_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
3419         }
3420     }
3421
3422
3423     entry->posFixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "posFixup"));
3424     entry->ycorrection_location = GL_EXTCALL(glGetUniformLocationARB(programId, "ycorrection"));
3425     checkGLcall("Find glsl program uniform locations");
3426
3427     if (pshader
3428             && WINED3DSHADER_VERSION_MAJOR(((IWineD3DPixelShaderImpl *)pshader)->baseShader.reg_maps.shader_version) >= 3
3429             && ((IWineD3DPixelShaderImpl *)pshader)->declared_in_count > GL_LIMITS(glsl_varyings) / 4)
3430     {
3431         TRACE("Shader %d needs vertex color clamping disabled\n", programId);
3432         entry->vertex_color_clamp = GL_FALSE;
3433     } else {
3434         entry->vertex_color_clamp = GL_FIXED_ONLY_ARB;
3435     }
3436
3437     /* Set the shader to allow uniform loading on it */
3438     GL_EXTCALL(glUseProgramObjectARB(programId));
3439     checkGLcall("glUseProgramObjectARB(programId)");
3440
3441     /* Load the vertex and pixel samplers now. The function that finds the mappings makes sure
3442      * that it stays the same for each vertexshader-pixelshader pair(=linked glsl program). If
3443      * a pshader with fixed function pipeline is used there are no vertex samplers, and if a
3444      * vertex shader with fixed function pixel processing is used we make sure that the card
3445      * supports enough samplers to allow the max number of vertex samplers with all possible
3446      * fixed function fragment processing setups. So once the program is linked these samplers
3447      * won't change.
3448      */
3449     if(vshader_id) {
3450         /* Load vertex shader samplers */
3451         shader_glsl_load_vsamplers(gl_info, (IWineD3DStateBlock*)This->stateBlock, programId);
3452     }
3453     if(pshader_id) {
3454         /* Load pixel shader samplers */
3455         shader_glsl_load_psamplers(gl_info, (IWineD3DStateBlock*)This->stateBlock, programId);
3456     }
3457
3458     /* If the local constants do not have to be loaded with the environment constants,
3459      * load them now to have them hardcoded in the GLSL program. This saves some CPU cycles
3460      * later
3461      */
3462     if(pshader && !((IWineD3DPixelShaderImpl*)pshader)->baseShader.load_local_constsF) {
3463         hardcode_local_constants((IWineD3DBaseShaderImpl *) pshader, gl_info, programId, 'P');
3464     }
3465     if(vshader && !((IWineD3DVertexShaderImpl*)vshader)->baseShader.load_local_constsF) {
3466         hardcode_local_constants((IWineD3DBaseShaderImpl *) vshader, gl_info, programId, 'V');
3467     }
3468 }
3469
3470 static GLhandleARB create_glsl_blt_shader(const WineD3D_GL_Info *gl_info, enum tex_types tex_type)
3471 {
3472     GLhandleARB program_id;
3473     GLhandleARB vshader_id, pshader_id;
3474     static const char *blt_vshader[] =
3475     {
3476         "#version 120\n"
3477         "void main(void)\n"
3478         "{\n"
3479         "    gl_Position = gl_Vertex;\n"
3480         "    gl_FrontColor = vec4(1.0);\n"
3481         "    gl_TexCoord[0] = gl_MultiTexCoord0;\n"
3482         "}\n"
3483     };
3484
3485     static const char *blt_pshaders[tex_type_count] =
3486     {
3487         /* tex_1d */
3488         NULL,
3489         /* tex_2d */
3490         "#version 120\n"
3491         "uniform sampler2D sampler;\n"
3492         "void main(void)\n"
3493         "{\n"
3494         "    gl_FragDepth = texture2D(sampler, gl_TexCoord[0].xy).x;\n"
3495         "}\n",
3496         /* tex_3d */
3497         NULL,
3498         /* tex_cube */
3499         "#version 120\n"
3500         "uniform samplerCube sampler;\n"
3501         "void main(void)\n"
3502         "{\n"
3503         "    gl_FragDepth = textureCube(sampler, gl_TexCoord[0].xyz).x;\n"
3504         "}\n",
3505         /* tex_rect */
3506         "#version 120\n"
3507         "#extension GL_ARB_texture_rectangle : enable\n"
3508         "uniform sampler2DRect sampler;\n"
3509         "void main(void)\n"
3510         "{\n"
3511         "    gl_FragDepth = texture2DRect(sampler, gl_TexCoord[0].xy).x;\n"
3512         "}\n",
3513     };
3514
3515     if (!blt_pshaders[tex_type])
3516     {
3517         FIXME("tex_type %#x not supported\n", tex_type);
3518         tex_type = tex_2d;
3519     }
3520
3521     vshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
3522     GL_EXTCALL(glShaderSourceARB(vshader_id, 1, blt_vshader, NULL));
3523     GL_EXTCALL(glCompileShaderARB(vshader_id));
3524
3525     pshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
3526     GL_EXTCALL(glShaderSourceARB(pshader_id, 1, &blt_pshaders[tex_type], NULL));
3527     GL_EXTCALL(glCompileShaderARB(pshader_id));
3528
3529     program_id = GL_EXTCALL(glCreateProgramObjectARB());
3530     GL_EXTCALL(glAttachObjectARB(program_id, vshader_id));
3531     GL_EXTCALL(glAttachObjectARB(program_id, pshader_id));
3532     GL_EXTCALL(glLinkProgramARB(program_id));
3533
3534     print_glsl_info_log(&GLINFO_LOCATION, program_id);
3535
3536     /* Once linked we can mark the shaders for deletion. They will be deleted once the program
3537      * is destroyed
3538      */
3539     GL_EXTCALL(glDeleteObjectARB(vshader_id));
3540     GL_EXTCALL(glDeleteObjectARB(pshader_id));
3541     return program_id;
3542 }
3543
3544 static void shader_glsl_select(IWineD3DDevice *iface, BOOL usePS, BOOL useVS) {
3545     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
3546     struct shader_glsl_priv *priv = (struct shader_glsl_priv *)This->shader_priv;
3547     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
3548     GLhandleARB program_id = 0;
3549     GLenum old_vertex_color_clamp, current_vertex_color_clamp;
3550
3551     old_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
3552
3553     if (useVS || usePS) set_glsl_shader_program(iface, usePS, useVS);
3554     else priv->glsl_program = NULL;
3555
3556     current_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
3557
3558     if (old_vertex_color_clamp != current_vertex_color_clamp) {
3559         if (GL_SUPPORT(ARB_COLOR_BUFFER_FLOAT)) {
3560             GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, current_vertex_color_clamp));
3561             checkGLcall("glClampColorARB");
3562         } else {
3563             FIXME("vertex color clamp needs to be changed, but extension not supported.\n");
3564         }
3565     }
3566
3567     program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
3568     if (program_id) TRACE("Using GLSL program %u\n", program_id);
3569     GL_EXTCALL(glUseProgramObjectARB(program_id));
3570     checkGLcall("glUseProgramObjectARB");
3571 }
3572
3573 static void shader_glsl_select_depth_blt(IWineD3DDevice *iface, enum tex_types tex_type) {
3574     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
3575     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
3576     struct shader_glsl_priv *priv = (struct shader_glsl_priv *) This->shader_priv;
3577     GLhandleARB *blt_program = &priv->depth_blt_program[tex_type];
3578
3579     if (!*blt_program) {
3580         GLhandleARB loc;
3581         *blt_program = create_glsl_blt_shader(gl_info, tex_type);
3582         loc = GL_EXTCALL(glGetUniformLocationARB(*blt_program, "sampler"));
3583         GL_EXTCALL(glUseProgramObjectARB(*blt_program));
3584         GL_EXTCALL(glUniform1iARB(loc, 0));
3585     } else {
3586         GL_EXTCALL(glUseProgramObjectARB(*blt_program));
3587     }
3588 }
3589
3590 static void shader_glsl_deselect_depth_blt(IWineD3DDevice *iface) {
3591     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
3592     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
3593     struct shader_glsl_priv *priv = (struct shader_glsl_priv *) This->shader_priv;
3594     GLhandleARB program_id;
3595
3596     program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
3597     if (program_id) TRACE("Using GLSL program %u\n", program_id);
3598
3599     GL_EXTCALL(glUseProgramObjectARB(program_id));
3600     checkGLcall("glUseProgramObjectARB");
3601 }
3602
3603 static void shader_glsl_destroy(IWineD3DBaseShader *iface) {
3604     const struct list *linked_programs;
3605     IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *) iface;
3606     IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)This->baseShader.device;
3607     struct shader_glsl_priv *priv = (struct shader_glsl_priv *)device->shader_priv;
3608     const WineD3D_GL_Info *gl_info = &device->adapter->gl_info;
3609     IWineD3DPixelShaderImpl *ps = NULL;
3610     IWineD3DVertexShaderImpl *vs = NULL;
3611
3612     /* Note: Do not use QueryInterface here to find out which shader type this is because this code
3613      * can be called from IWineD3DBaseShader::Release
3614      */
3615     char pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version);
3616
3617     if(pshader) {
3618         ps = (IWineD3DPixelShaderImpl *) This;
3619         if(ps->num_gl_shaders == 0) return;
3620     } else {
3621         vs = (IWineD3DVertexShaderImpl *) This;
3622         if(vs->prgId == 0) return;
3623     }
3624
3625     linked_programs = &This->baseShader.linked_programs;
3626
3627     TRACE("Deleting linked programs\n");
3628     if (linked_programs->next) {
3629         struct glsl_shader_prog_link *entry, *entry2;
3630
3631         if(pshader) {
3632             LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, pshader_entry) {
3633                 delete_glsl_program_entry(priv, gl_info, entry);
3634             }
3635         } else {
3636             LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
3637                 delete_glsl_program_entry(priv, gl_info, entry);
3638             }
3639         }
3640     }
3641
3642     if(pshader) {
3643         UINT i;
3644
3645         ENTER_GL();
3646         for(i = 0; i < ps->num_gl_shaders; i++) {
3647             TRACE("deleting pshader %u\n", ps->gl_shaders[i].prgId);
3648             GL_EXTCALL(glDeleteObjectARB(ps->gl_shaders[i].prgId));
3649             checkGLcall("glDeleteObjectARB");
3650         }
3651         LEAVE_GL();
3652         HeapFree(GetProcessHeap(), 0, ps->gl_shaders);
3653         ps->gl_shaders = NULL;
3654         ps->num_gl_shaders = 0;
3655     } else {
3656         TRACE("Deleting shader object %u\n", vs->prgId);
3657         ENTER_GL();
3658         GL_EXTCALL(glDeleteObjectARB(vs->prgId));
3659         checkGLcall("glDeleteObjectARB");
3660         LEAVE_GL();
3661         vs->prgId = 0;
3662         vs->baseShader.is_compiled = FALSE;
3663     }
3664 }
3665
3666 static unsigned int glsl_program_key_hash(const void *key)
3667 {
3668     const glsl_program_key_t *k = (const glsl_program_key_t *)key;
3669
3670     unsigned int hash = k->vshader | ((DWORD_PTR) k->pshader) << 16;
3671     hash += ~(hash << 15);
3672     hash ^=  (hash >> 10);
3673     hash +=  (hash << 3);
3674     hash ^=  (hash >> 6);
3675     hash += ~(hash << 11);
3676     hash ^=  (hash >> 16);
3677
3678     return hash;
3679 }
3680
3681 static BOOL glsl_program_key_compare(const void *keya, const void *keyb)
3682 {
3683     const glsl_program_key_t *ka = (const glsl_program_key_t *)keya;
3684     const glsl_program_key_t *kb = (const glsl_program_key_t *)keyb;
3685
3686     return ka->vshader == kb->vshader && ka->pshader == kb->pshader &&
3687            (memcmp(&ka->ps_args, &kb->ps_args, sizeof(kb->ps_args)) == 0);
3688 }
3689
3690 static BOOL constant_heap_init(struct constant_heap *heap, unsigned int constant_count)
3691 {
3692     SIZE_T size = (constant_count + 1) * sizeof(*heap->entries) + constant_count * sizeof(*heap->positions);
3693     void *mem = HeapAlloc(GetProcessHeap(), 0, size);
3694
3695     if (!mem)
3696     {
3697         ERR("Failed to allocate memory\n");
3698         return FALSE;
3699     }
3700
3701     heap->entries = mem;
3702     heap->entries[1].version = 0;
3703     heap->positions = (unsigned int *)(heap->entries + constant_count + 1);
3704     heap->size = 1;
3705
3706     return TRUE;
3707 }
3708
3709 static void constant_heap_free(struct constant_heap *heap)
3710 {
3711     HeapFree(GetProcessHeap(), 0, heap->entries);
3712 }
3713
3714 static HRESULT shader_glsl_alloc(IWineD3DDevice *iface) {
3715     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
3716     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
3717     struct shader_glsl_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_glsl_priv));
3718     SIZE_T stack_size = wined3d_log2i(max(GL_LIMITS(vshader_constantsF), GL_LIMITS(pshader_constantsF))) + 1;
3719
3720     priv->stack = HeapAlloc(GetProcessHeap(), 0, stack_size * sizeof(*priv->stack));
3721     if (!priv->stack)
3722     {
3723         ERR("Failed to allocate memory.\n");
3724         HeapFree(GetProcessHeap(), 0, priv);
3725         return E_OUTOFMEMORY;
3726     }
3727
3728     if (!constant_heap_init(&priv->vconst_heap, GL_LIMITS(vshader_constantsF)))
3729     {
3730         ERR("Failed to initialize vertex shader constant heap\n");
3731         HeapFree(GetProcessHeap(), 0, priv->stack);
3732         HeapFree(GetProcessHeap(), 0, priv);
3733         return E_OUTOFMEMORY;
3734     }
3735
3736     if (!constant_heap_init(&priv->pconst_heap, GL_LIMITS(pshader_constantsF)))
3737     {
3738         ERR("Failed to initialize pixel shader constant heap\n");
3739         constant_heap_free(&priv->vconst_heap);
3740         HeapFree(GetProcessHeap(), 0, priv->stack);
3741         HeapFree(GetProcessHeap(), 0, priv);
3742         return E_OUTOFMEMORY;
3743     }
3744
3745     priv->glsl_program_lookup = hash_table_create(glsl_program_key_hash, glsl_program_key_compare);
3746     priv->next_constant_version = 1;
3747
3748     This->shader_priv = priv;
3749     return WINED3D_OK;
3750 }
3751
3752 static void shader_glsl_free(IWineD3DDevice *iface) {
3753     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
3754     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
3755     struct shader_glsl_priv *priv = (struct shader_glsl_priv *)This->shader_priv;
3756     int i;
3757
3758     for (i = 0; i < tex_type_count; ++i)
3759     {
3760         if (priv->depth_blt_program[i])
3761         {
3762             GL_EXTCALL(glDeleteObjectARB(priv->depth_blt_program[i]));
3763         }
3764     }
3765
3766     hash_table_destroy(priv->glsl_program_lookup, NULL, NULL);
3767     constant_heap_free(&priv->pconst_heap);
3768     constant_heap_free(&priv->vconst_heap);
3769
3770     HeapFree(GetProcessHeap(), 0, This->shader_priv);
3771     This->shader_priv = NULL;
3772 }
3773
3774 static BOOL shader_glsl_dirty_const(IWineD3DDevice *iface) {
3775     /* TODO: GL_EXT_bindable_uniform can be used to share constants across shaders */
3776     return FALSE;
3777 }
3778
3779 static GLuint shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer, const struct ps_compile_args *args) {
3780     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
3781     const struct shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
3782     CONST DWORD *function = This->baseShader.function;
3783     const char *fragcolor;
3784     const WineD3D_GL_Info *gl_info = &((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info;
3785
3786     /* Create the hw GLSL shader object and assign it as the shader->prgId */
3787     GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
3788
3789     shader_addline(buffer, "#version 120\n");
3790
3791     if (GL_SUPPORT(ARB_DRAW_BUFFERS)) {
3792         shader_addline(buffer, "#extension GL_ARB_draw_buffers : enable\n");
3793     }
3794     if (GL_SUPPORT(ARB_TEXTURE_RECTANGLE)) {
3795         /* The spec says that it doesn't have to be explicitly enabled, but the nvidia
3796          * drivers write a warning if we don't do so
3797          */
3798         shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n");
3799     }
3800
3801     /* Base Declarations */
3802     shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, args);
3803
3804     /* Pack 3.0 inputs */
3805     if (reg_maps->shader_version >= WINED3DPS_VERSION(3,0) && args->vp_mode != vertexshader) {
3806         pshader_glsl_input_pack(buffer, This->semantics_in, iface, args->vp_mode);
3807     }
3808
3809     /* Base Shader Body */
3810     shader_generate_main( (IWineD3DBaseShader*) This, buffer, reg_maps, function);
3811
3812     /* Pixel shaders < 2.0 place the resulting color in R0 implicitly */
3813     if (reg_maps->shader_version < WINED3DPS_VERSION(2,0))
3814     {
3815         /* Some older cards like GeforceFX ones don't support multiple buffers, so also not gl_FragData */
3816         if(GL_SUPPORT(ARB_DRAW_BUFFERS))
3817             shader_addline(buffer, "gl_FragData[0] = R0;\n");
3818         else
3819             shader_addline(buffer, "gl_FragColor = R0;\n");
3820     }
3821
3822     if(GL_SUPPORT(ARB_DRAW_BUFFERS)) {
3823         fragcolor = "gl_FragData[0]";
3824     } else {
3825         fragcolor = "gl_FragColor";
3826     }
3827     if(args->srgb_correction) {
3828         shader_addline(buffer, "tmp0.xyz = pow(%s.xyz, vec3(%f, %f, %f)) * vec3(%f, %f, %f) - vec3(%f, %f, %f);\n",
3829                         fragcolor, srgb_pow, srgb_pow, srgb_pow, srgb_mul_high, srgb_mul_high, srgb_mul_high,
3830                         srgb_sub_high, srgb_sub_high, srgb_sub_high);
3831         shader_addline(buffer, "tmp1.xyz = %s.xyz * srgb_mul_low.xyz;\n", fragcolor);
3832         shader_addline(buffer, "%s.x = %s.x < srgb_comparison.x ? tmp1.x : tmp0.x;\n", fragcolor, fragcolor);
3833         shader_addline(buffer, "%s.y = %s.y < srgb_comparison.y ? tmp1.y : tmp0.y;\n", fragcolor, fragcolor);
3834         shader_addline(buffer, "%s.z = %s.z < srgb_comparison.z ? tmp1.z : tmp0.z;\n", fragcolor, fragcolor);
3835         shader_addline(buffer, "%s = clamp(%s, 0.0, 1.0);\n", fragcolor, fragcolor);
3836     }
3837     /* Pixel shader < 3.0 do not replace the fog stage.
3838      * This implements linear fog computation and blending.
3839      * TODO: non linear fog
3840      * NOTE: gl_Fog.start and gl_Fog.end don't hold fog start s and end e but
3841      * -1/(e-s) and e/(e-s) respectively.
3842      */
3843     if(reg_maps->shader_version < WINED3DPS_VERSION(3,0)) {
3844         switch(args->fog) {
3845             case FOG_OFF: break;
3846             case FOG_LINEAR:
3847                 shader_addline(buffer, "float fogstart = -1.0 / (gl_Fog.end - gl_Fog.start);\n");
3848                 shader_addline(buffer, "float fogend = gl_Fog.end * -fogstart;\n");
3849                 shader_addline(buffer, "float Fog = clamp(gl_FogFragCoord * fogstart + fogend, 0.0, 1.0);\n");
3850                 shader_addline(buffer, "%s.xyz = mix(gl_Fog.color.xyz, %s.xyz, Fog);\n", fragcolor, fragcolor);
3851                 break;
3852             case FOG_EXP:
3853                 FIXME("Implement EXP fog in glsl\n");
3854                 break;
3855             case FOG_EXP2:
3856                 FIXME("Implement EXP2 fog in glsl\n");
3857                 break;
3858         }
3859     }
3860
3861     shader_addline(buffer, "}\n");
3862
3863     TRACE("Compiling shader object %u\n", shader_obj);
3864     GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer->buffer, NULL));
3865     GL_EXTCALL(glCompileShaderARB(shader_obj));
3866     print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
3867
3868     /* Store the shader object */
3869     return shader_obj;
3870 }
3871
3872 static void shader_glsl_generate_vshader(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer) {
3873     IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
3874     const struct shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
3875     CONST DWORD *function = This->baseShader.function;
3876     const WineD3D_GL_Info *gl_info = &((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info;
3877
3878     /* Create the hw GLSL shader program and assign it as the shader->prgId */
3879     GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
3880
3881     shader_addline(buffer, "#version 120\n");
3882
3883     /* Base Declarations */
3884     shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, NULL);
3885
3886     /* Base Shader Body */
3887     shader_generate_main( (IWineD3DBaseShader*) This, buffer, reg_maps, function);
3888
3889     /* Unpack 3.0 outputs */
3890     if (reg_maps->shader_version >= WINED3DVS_VERSION(3,0)) shader_addline(buffer, "order_ps_input(OUT);\n");
3891     else shader_addline(buffer, "order_ps_input();\n");
3892
3893     /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
3894     if (!reg_maps->fog)
3895         shader_addline(buffer, "gl_FogFragCoord = gl_Position.z;\n");
3896
3897     /* Write the final position.
3898      *
3899      * OpenGL coordinates specify the center of the pixel while d3d coords specify
3900      * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
3901      * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
3902      * contains 1.0 to allow a mad.
3903      */
3904     shader_addline(buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n");
3905     shader_addline(buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n");
3906
3907     /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
3908      *
3909      * Basically we want (in homogeneous coordinates) z = z * 2 - 1. However, shaders are run
3910      * before the homogeneous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
3911      * which is the same as z = z * 2 - w.
3912      */
3913     shader_addline(buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
3914
3915     shader_addline(buffer, "}\n");
3916
3917     TRACE("Compiling shader object %u\n", shader_obj);
3918     GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer->buffer, NULL));
3919     GL_EXTCALL(glCompileShaderARB(shader_obj));
3920     print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
3921
3922     /* Store the shader object */
3923     This->prgId = shader_obj;
3924 }
3925
3926 static void shader_glsl_get_caps(WINED3DDEVTYPE devtype, const WineD3D_GL_Info *gl_info, struct shader_caps *pCaps)
3927 {
3928     /* Nvidia Geforce6/7 or Ati R4xx/R5xx cards with GLSL support, support VS 3.0 but older Nvidia/Ati
3929      * models with GLSL support only support 2.0. In case of nvidia we can detect VS 2.0 support using
3930      * vs_nv_version which is based on NV_vertex_program.
3931      * For Ati cards there's no way using glsl (it abstracts the lowlevel info away) and also not
3932      * using ARB_vertex_program. It is safe to assume that when a card supports pixel shader 2.0 it
3933      * supports vertex shader 2.0 too and the way around. We can detect ps2.0 using the maximum number
3934      * of native instructions, so use that here. For more info see the pixel shader versioning code below.
3935      */
3936     if((GLINFO_LOCATION.vs_nv_version == VS_VERSION_20) || (GLINFO_LOCATION.ps_arb_max_instructions <= 512))
3937         pCaps->VertexShaderVersion = WINED3DVS_VERSION(2,0);
3938     else
3939         pCaps->VertexShaderVersion = WINED3DVS_VERSION(3,0);
3940     TRACE_(d3d_caps)("Hardware vertex shader version %d.%d enabled (GLSL)\n", (pCaps->VertexShaderVersion >> 8) & 0xff, pCaps->VertexShaderVersion & 0xff);
3941     pCaps->MaxVertexShaderConst = GL_LIMITS(vshader_constantsF);
3942
3943     /* Older DX9-class videocards (GeforceFX / Radeon >9500/X*00) only support pixel shader 2.0/2.0a/2.0b.
3944      * In OpenGL the extensions related to GLSL abstract lowlevel GL info away which is needed
3945      * to distinguish between 2.0 and 3.0 (and 2.0a/2.0b). In case of Nvidia we use their fragment
3946      * program extensions. On other hardware including ATI GL_ARB_fragment_program offers the info
3947      * in max native instructions. Intel and others also offer the info in this extension but they
3948      * don't support GLSL (at least on Windows).
3949      *
3950      * PS2.0 requires at least 96 instructions, 2.0a/2.0b go up to 512. Assume that if the number
3951      * of instructions is 512 or less we have to do with ps2.0 hardware.
3952      * NOTE: ps3.0 hardware requires 512 or more instructions but ati and nvidia offer 'enough' (1024 vs 4096) on their most basic ps3.0 hardware.
3953      */
3954     if((GLINFO_LOCATION.ps_nv_version == PS_VERSION_20) || (GLINFO_LOCATION.ps_arb_max_instructions <= 512))
3955         pCaps->PixelShaderVersion = WINED3DPS_VERSION(2,0);
3956     else
3957         pCaps->PixelShaderVersion = WINED3DPS_VERSION(3,0);
3958
3959     /* FIXME: The following line is card dependent. -8.0 to 8.0 is the
3960      * Direct3D minimum requirement.
3961      *
3962      * Both GL_ARB_fragment_program and GLSL require a "maximum representable magnitude"
3963      * of colors to be 2^10, and 2^32 for other floats. Should we use 1024 here?
3964      *
3965      * The problem is that the refrast clamps temporary results in the shader to
3966      * [-MaxValue;+MaxValue]. If the card's max value is bigger than the one we advertize here,
3967      * then applications may miss the clamping behavior. On the other hand, if it is smaller,
3968      * the shader will generate incorrect results too. Unfortunately, GL deliberately doesn't
3969      * offer a way to query this.
3970      */
3971     pCaps->PixelShader1xMaxValue = 8.0;
3972     TRACE_(d3d_caps)("Hardware pixel shader version %d.%d enabled (GLSL)\n", (pCaps->PixelShaderVersion >> 8) & 0xff, pCaps->PixelShaderVersion & 0xff);
3973 }
3974
3975 static BOOL shader_glsl_color_fixup_supported(struct color_fixup_desc fixup)
3976 {
3977     if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
3978     {
3979         TRACE("Checking support for fixup:\n");
3980         dump_color_fixup_desc(fixup);
3981     }
3982
3983     /* We support everything except YUV conversions. */
3984     if (!is_yuv_fixup(fixup))
3985     {
3986         TRACE("[OK]\n");
3987         return TRUE;
3988     }
3989
3990     TRACE("[FAILED]\n");
3991     return FALSE;
3992 }
3993
3994 static const SHADER_HANDLER shader_glsl_instruction_handler_table[WINED3DSIH_TABLE_SIZE] =
3995 {
3996     /* WINED3DSIH_ABS           */ shader_glsl_map2gl,
3997     /* WINED3DSIH_ADD           */ shader_glsl_arith,
3998     /* WINED3DSIH_BEM           */ pshader_glsl_bem,
3999     /* WINED3DSIH_BREAK         */ shader_glsl_break,
4000     /* WINED3DSIH_BREAKC        */ shader_glsl_breakc,
4001     /* WINED3DSIH_BREAKP        */ NULL,
4002     /* WINED3DSIH_CALL          */ shader_glsl_call,
4003     /* WINED3DSIH_CALLNZ        */ shader_glsl_callnz,
4004     /* WINED3DSIH_CMP           */ shader_glsl_cmp,
4005     /* WINED3DSIH_CND           */ shader_glsl_cnd,
4006     /* WINED3DSIH_CRS           */ shader_glsl_cross,
4007     /* WINED3DSIH_DCL           */ NULL,
4008     /* WINED3DSIH_DEF           */ NULL,
4009     /* WINED3DSIH_DEFB          */ NULL,
4010     /* WINED3DSIH_DEFI          */ NULL,
4011     /* WINED3DSIH_DP2ADD        */ pshader_glsl_dp2add,
4012     /* WINED3DSIH_DP3           */ shader_glsl_dot,
4013     /* WINED3DSIH_DP4           */ shader_glsl_dot,
4014     /* WINED3DSIH_DST           */ shader_glsl_dst,
4015     /* WINED3DSIH_DSX           */ shader_glsl_map2gl,
4016     /* WINED3DSIH_DSY           */ shader_glsl_map2gl,
4017     /* WINED3DSIH_ELSE          */ shader_glsl_else,
4018     /* WINED3DSIH_ENDIF         */ shader_glsl_end,
4019     /* WINED3DSIH_ENDLOOP       */ shader_glsl_end,
4020     /* WINED3DSIH_ENDREP        */ shader_glsl_end,
4021     /* WINED3DSIH_EXP           */ shader_glsl_map2gl,
4022     /* WINED3DSIH_EXPP          */ shader_glsl_expp,
4023     /* WINED3DSIH_FRC           */ shader_glsl_map2gl,
4024     /* WINED3DSIH_IF            */ shader_glsl_if,
4025     /* WINED3DSIH_IFC           */ shader_glsl_ifc,
4026     /* WINED3DSIH_LABEL         */ shader_glsl_label,
4027     /* WINED3DSIH_LIT           */ shader_glsl_lit,
4028     /* WINED3DSIH_LOG           */ shader_glsl_log,
4029     /* WINED3DSIH_LOGP          */ shader_glsl_log,
4030     /* WINED3DSIH_LOOP          */ shader_glsl_loop,
4031     /* WINED3DSIH_LRP           */ shader_glsl_lrp,
4032     /* WINED3DSIH_M3x2          */ shader_glsl_mnxn,
4033     /* WINED3DSIH_M3x3          */ shader_glsl_mnxn,
4034     /* WINED3DSIH_M3x4          */ shader_glsl_mnxn,
4035     /* WINED3DSIH_M4x3          */ shader_glsl_mnxn,
4036     /* WINED3DSIH_M4x4          */ shader_glsl_mnxn,
4037     /* WINED3DSIH_MAD           */ shader_glsl_mad,
4038     /* WINED3DSIH_MAX           */ shader_glsl_map2gl,
4039     /* WINED3DSIH_MIN           */ shader_glsl_map2gl,
4040     /* WINED3DSIH_MOV           */ shader_glsl_mov,
4041     /* WINED3DSIH_MOVA          */ shader_glsl_mov,
4042     /* WINED3DSIH_MUL           */ shader_glsl_arith,
4043     /* WINED3DSIH_NOP           */ NULL,
4044     /* WINED3DSIH_NRM           */ shader_glsl_map2gl,
4045     /* WINED3DSIH_PHASE         */ NULL,
4046     /* WINED3DSIH_POW           */ shader_glsl_pow,
4047     /* WINED3DSIH_RCP           */ shader_glsl_rcp,
4048     /* WINED3DSIH_REP           */ shader_glsl_rep,
4049     /* WINED3DSIH_RET           */ NULL,
4050     /* WINED3DSIH_RSQ           */ shader_glsl_rsq,
4051     /* WINED3DSIH_SETP          */ NULL,
4052     /* WINED3DSIH_SGE           */ shader_glsl_compare,
4053     /* WINED3DSIH_SGN           */ shader_glsl_map2gl,
4054     /* WINED3DSIH_SINCOS        */ shader_glsl_sincos,
4055     /* WINED3DSIH_SLT           */ shader_glsl_compare,
4056     /* WINED3DSIH_SUB           */ shader_glsl_arith,
4057     /* WINED3DSIH_TEX           */ pshader_glsl_tex,
4058     /* WINED3DSIH_TEXBEM        */ pshader_glsl_texbem,
4059     /* WINED3DSIH_TEXBEML       */ pshader_glsl_texbem,
4060     /* WINED3DSIH_TEXCOORD      */ pshader_glsl_texcoord,
4061     /* WINED3DSIH_TEXDEPTH      */ pshader_glsl_texdepth,
4062     /* WINED3DSIH_TEXDP3        */ pshader_glsl_texdp3,
4063     /* WINED3DSIH_TEXDP3TEX     */ pshader_glsl_texdp3tex,
4064     /* WINED3DSIH_TEXKILL       */ pshader_glsl_texkill,
4065     /* WINED3DSIH_TEXLDD        */ NULL,
4066     /* WINED3DSIH_TEXLDL        */ shader_glsl_texldl,
4067     /* WINED3DSIH_TEXM3x2DEPTH  */ pshader_glsl_texm3x2depth,
4068     /* WINED3DSIH_TEXM3x2PAD    */ pshader_glsl_texm3x2pad,
4069     /* WINED3DSIH_TEXM3x2TEX    */ pshader_glsl_texm3x2tex,
4070     /* WINED3DSIH_TEXM3x3       */ pshader_glsl_texm3x3,
4071     /* WINED3DSIH_TEXM3x3DIFF   */ NULL,
4072     /* WINED3DSIH_TEXM3x3PAD    */ pshader_glsl_texm3x3pad,
4073     /* WINED3DSIH_TEXM3x3SPEC   */ pshader_glsl_texm3x3spec,
4074     /* WINED3DSIH_TEXM3x3TEX    */ pshader_glsl_texm3x3tex,
4075     /* WINED3DSIH_TEXM3x3VSPEC  */ pshader_glsl_texm3x3vspec,
4076     /* WINED3DSIH_TEXREG2AR     */ pshader_glsl_texreg2ar,
4077     /* WINED3DSIH_TEXREG2GB     */ pshader_glsl_texreg2gb,
4078     /* WINED3DSIH_TEXREG2RGB    */ pshader_glsl_texreg2rgb,
4079 };
4080
4081 const shader_backend_t glsl_shader_backend = {
4082     shader_glsl_instruction_handler_table,
4083     shader_glsl_select,
4084     shader_glsl_select_depth_blt,
4085     shader_glsl_deselect_depth_blt,
4086     shader_glsl_update_float_vertex_constants,
4087     shader_glsl_update_float_pixel_constants,
4088     shader_glsl_load_constants,
4089     shader_glsl_color_correction,
4090     shader_glsl_destroy,
4091     shader_glsl_alloc,
4092     shader_glsl_free,
4093     shader_glsl_dirty_const,
4094     shader_glsl_generate_pshader,
4095     shader_glsl_generate_vshader,
4096     shader_glsl_get_caps,
4097     shader_glsl_color_fixup_supported,
4098 };