crypt32: Add tests for encoding/decoding cert policy constraints.
[wine] / dlls / wined3d / glsl_shader.c
1 /*
2  * GLSL pixel and vertex shader implementation
3  *
4  * Copyright 2006 Jason Green
5  * Copyright 2006-2007 Henri Verbeet
6  * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
7  * Copyright 2009 Henri Verbeet for CodeWeavers
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22  */
23
24 /*
25  * D3D shader asm has swizzles on source parameters, and write masks for
26  * destination parameters. GLSL uses swizzles for both. The result of this is
27  * that for example "mov dst.xw, src.zyxw" becomes "dst.xw = src.zw" in GLSL.
28  * Ie, to generate a proper GLSL source swizzle, we need to take the D3D write
29  * mask for the destination parameter into account.
30  */
31
32 #include "config.h"
33 #include <limits.h>
34 #include <stdio.h>
35 #include "wined3d_private.h"
36
37 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
38 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
39 WINE_DECLARE_DEBUG_CHANNEL(d3d_caps);
40 WINE_DECLARE_DEBUG_CHANNEL(d3d);
41
42 #define GLINFO_LOCATION      (*gl_info)
43
44 #define WINED3D_GLSL_SAMPLE_PROJECTED   0x1
45 #define WINED3D_GLSL_SAMPLE_RECT        0x2
46 #define WINED3D_GLSL_SAMPLE_LOD         0x4
47 #define WINED3D_GLSL_SAMPLE_GRAD        0x8
48
49 typedef struct {
50     char reg_name[150];
51     char mask_str[6];
52 } glsl_dst_param_t;
53
54 typedef struct {
55     char reg_name[150];
56     char param_str[200];
57 } glsl_src_param_t;
58
59 typedef struct {
60     const char *name;
61     DWORD coord_mask;
62 } glsl_sample_function_t;
63
64 enum heap_node_op
65 {
66     HEAP_NODE_TRAVERSE_LEFT,
67     HEAP_NODE_TRAVERSE_RIGHT,
68     HEAP_NODE_POP,
69 };
70
71 struct constant_entry
72 {
73     unsigned int idx;
74     unsigned int version;
75 };
76
77 struct constant_heap
78 {
79     struct constant_entry *entries;
80     unsigned int *positions;
81     unsigned int size;
82 };
83
84 /* GLSL shader private data */
85 struct shader_glsl_priv {
86     struct wined3d_shader_buffer shader_buffer;
87     struct wine_rb_tree program_lookup;
88     struct glsl_shader_prog_link *glsl_program;
89     struct constant_heap vconst_heap;
90     struct constant_heap pconst_heap;
91     unsigned char *stack;
92     GLhandleARB depth_blt_program[tex_type_count];
93     UINT next_constant_version;
94 };
95
96 /* Struct to maintain data about a linked GLSL program */
97 struct glsl_shader_prog_link {
98     struct wine_rb_entry        program_lookup_entry;
99     struct list                 vshader_entry;
100     struct list                 pshader_entry;
101     GLhandleARB                 programId;
102     GLint                       *vuniformF_locations;
103     GLint                       *puniformF_locations;
104     GLint                       vuniformI_locations[MAX_CONST_I];
105     GLint                       puniformI_locations[MAX_CONST_I];
106     GLint                       posFixup_location;
107     GLint                       np2Fixup_location;
108     GLint                       bumpenvmat_location[MAX_TEXTURES];
109     GLint                       luminancescale_location[MAX_TEXTURES];
110     GLint                       luminanceoffset_location[MAX_TEXTURES];
111     GLint                       ycorrection_location;
112     GLenum                      vertex_color_clamp;
113     IWineD3DVertexShader        *vshader;
114     IWineD3DPixelShader         *pshader;
115     struct vs_compile_args      vs_args;
116     struct ps_compile_args      ps_args;
117     UINT                        constant_version;
118     const struct ps_np2fixup_info *np2Fixup_info;
119 };
120
121 typedef struct {
122     IWineD3DVertexShader        *vshader;
123     IWineD3DPixelShader         *pshader;
124     struct ps_compile_args      ps_args;
125     struct vs_compile_args      vs_args;
126 } glsl_program_key_t;
127
128 struct shader_glsl_ctx_priv {
129     const struct vs_compile_args    *cur_vs_args;
130     const struct ps_compile_args    *cur_ps_args;
131     struct ps_np2fixup_info         *cur_np2fixup_info;
132 };
133
134 struct glsl_ps_compiled_shader
135 {
136     struct ps_compile_args          args;
137     struct ps_np2fixup_info         np2fixup;
138     GLhandleARB                     prgId;
139 };
140
141 struct glsl_pshader_private
142 {
143     struct glsl_ps_compiled_shader  *gl_shaders;
144     UINT                            num_gl_shaders, shader_array_size;
145 };
146
147 struct glsl_vs_compiled_shader
148 {
149     struct vs_compile_args          args;
150     GLhandleARB                     prgId;
151 };
152
153 struct glsl_vshader_private
154 {
155     struct glsl_vs_compiled_shader  *gl_shaders;
156     UINT                            num_gl_shaders, shader_array_size;
157 };
158
159 /* Extract a line from the info log.
160  * Note that this modifies the source string. */
161 static char *get_info_log_line(char **ptr)
162 {
163     char *p, *q;
164
165     p = *ptr;
166     if (!(q = strstr(p, "\n")))
167     {
168         if (!*p) return NULL;
169         *ptr += strlen(p);
170         return p;
171     }
172     *q = '\0';
173     *ptr = q + 1;
174
175     return p;
176 }
177
178 /** Prints the GLSL info log which will contain error messages if they exist */
179 /* GL locking is done by the caller */
180 static void print_glsl_info_log(const struct wined3d_gl_info *gl_info, GLhandleARB obj)
181 {
182     int infologLength = 0;
183     char *infoLog;
184     unsigned int i;
185     BOOL is_spam;
186
187     static const char * const spam[] =
188     {
189         "Vertex shader was successfully compiled to run on hardware.\n",    /* fglrx          */
190         "Fragment shader was successfully compiled to run on hardware.\n",  /* fglrx, with \n */
191         "Fragment shader was successfully compiled to run on hardware.",    /* fglrx, no \n   */
192         "Fragment shader(s) linked, vertex shader(s) linked. \n ",          /* fglrx, with \n */
193         "Fragment shader(s) linked, vertex shader(s) linked.",              /* fglrx, no \n   */
194         "Vertex shader(s) linked, no fragment shader(s) defined. \n ",      /* fglrx, with \n */
195         "Vertex shader(s) linked, no fragment shader(s) defined.",          /* fglrx, no \n   */
196         "Fragment shader(s) linked, no vertex shader(s) defined. \n ",      /* fglrx, with \n */
197         "Fragment shader(s) linked, no vertex shader(s) defined.",          /* fglrx, no \n   */
198     };
199
200     if (!TRACE_ON(d3d_shader) && !FIXME_ON(d3d_shader)) return;
201
202     GL_EXTCALL(glGetObjectParameterivARB(obj,
203                GL_OBJECT_INFO_LOG_LENGTH_ARB,
204                &infologLength));
205
206     /* A size of 1 is just a null-terminated string, so the log should be bigger than
207      * that if there are errors. */
208     if (infologLength > 1)
209     {
210         char *ptr, *line;
211
212         /* Fglrx doesn't terminate the string properly, but it tells us the proper length.
213          * So use HEAP_ZERO_MEMORY to avoid uninitialized bytes
214          */
215         infoLog = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, infologLength);
216         GL_EXTCALL(glGetInfoLogARB(obj, infologLength, NULL, infoLog));
217         is_spam = FALSE;
218
219         for(i = 0; i < sizeof(spam) / sizeof(spam[0]); i++) {
220             if(strcmp(infoLog, spam[i]) == 0) {
221                 is_spam = TRUE;
222                 break;
223             }
224         }
225
226         ptr = infoLog;
227         if (is_spam)
228         {
229             TRACE("Spam received from GLSL shader #%u:\n", obj);
230             while ((line = get_info_log_line(&ptr))) TRACE("    %s\n", line);
231         }
232         else
233         {
234             FIXME("Error received from GLSL shader #%u:\n", obj);
235             while ((line = get_info_log_line(&ptr))) FIXME("    %s\n", line);
236         }
237         HeapFree(GetProcessHeap(), 0, infoLog);
238     }
239 }
240
241 /**
242  * Loads (pixel shader) samplers
243  */
244 /* GL locking is done by the caller */
245 static void shader_glsl_load_psamplers(const struct wined3d_gl_info *gl_info,
246         DWORD *tex_unit_map, GLhandleARB programId)
247 {
248     GLint name_loc;
249     int i;
250     char sampler_name[20];
251
252     for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) {
253         snprintf(sampler_name, sizeof(sampler_name), "Psampler%d", i);
254         name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
255         if (name_loc != -1) {
256             DWORD mapped_unit = tex_unit_map[i];
257             if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.fragment_samplers)
258             {
259                 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
260                 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
261                 checkGLcall("glUniform1iARB");
262             } else {
263                 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
264             }
265         }
266     }
267 }
268
269 /* GL locking is done by the caller */
270 static void shader_glsl_load_vsamplers(const struct wined3d_gl_info *gl_info,
271         DWORD *tex_unit_map, GLhandleARB programId)
272 {
273     GLint name_loc;
274     char sampler_name[20];
275     int i;
276
277     for (i = 0; i < MAX_VERTEX_SAMPLERS; ++i) {
278         snprintf(sampler_name, sizeof(sampler_name), "Vsampler%d", i);
279         name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
280         if (name_loc != -1) {
281             DWORD mapped_unit = tex_unit_map[MAX_FRAGMENT_SAMPLERS + i];
282             if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.combined_samplers)
283             {
284                 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
285                 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
286                 checkGLcall("glUniform1iARB");
287             } else {
288                 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
289             }
290         }
291     }
292 }
293
294 /* GL locking is done by the caller */
295 static inline void walk_constant_heap(const struct wined3d_gl_info *gl_info, const float *constants,
296         const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
297 {
298     int stack_idx = 0;
299     unsigned int heap_idx = 1;
300     unsigned int idx;
301
302     if (heap->entries[heap_idx].version <= version) return;
303
304     idx = heap->entries[heap_idx].idx;
305     if (constant_locations[idx] != -1) GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
306     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
307
308     while (stack_idx >= 0)
309     {
310         /* Note that we fall through to the next case statement. */
311         switch(stack[stack_idx])
312         {
313             case HEAP_NODE_TRAVERSE_LEFT:
314             {
315                 unsigned int left_idx = heap_idx << 1;
316                 if (left_idx < heap->size && heap->entries[left_idx].version > version)
317                 {
318                     heap_idx = left_idx;
319                     idx = heap->entries[heap_idx].idx;
320                     if (constant_locations[idx] != -1)
321                         GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
322
323                     stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
324                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
325                     break;
326                 }
327             }
328
329             case HEAP_NODE_TRAVERSE_RIGHT:
330             {
331                 unsigned int right_idx = (heap_idx << 1) + 1;
332                 if (right_idx < heap->size && heap->entries[right_idx].version > version)
333                 {
334                     heap_idx = right_idx;
335                     idx = heap->entries[heap_idx].idx;
336                     if (constant_locations[idx] != -1)
337                         GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
338
339                     stack[stack_idx++] = HEAP_NODE_POP;
340                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
341                     break;
342                 }
343             }
344
345             case HEAP_NODE_POP:
346             {
347                 heap_idx >>= 1;
348                 --stack_idx;
349                 break;
350             }
351         }
352     }
353     checkGLcall("walk_constant_heap()");
354 }
355
356 /* GL locking is done by the caller */
357 static inline void apply_clamped_constant(const struct wined3d_gl_info *gl_info, GLint location, const GLfloat *data)
358 {
359     GLfloat clamped_constant[4];
360
361     if (location == -1) return;
362
363     clamped_constant[0] = data[0] < -1.0f ? -1.0f : data[0] > 1.0f ? 1.0f : data[0];
364     clamped_constant[1] = data[1] < -1.0f ? -1.0f : data[1] > 1.0f ? 1.0f : data[1];
365     clamped_constant[2] = data[2] < -1.0f ? -1.0f : data[2] > 1.0f ? 1.0f : data[2];
366     clamped_constant[3] = data[3] < -1.0f ? -1.0f : data[3] > 1.0f ? 1.0f : data[3];
367
368     GL_EXTCALL(glUniform4fvARB(location, 1, clamped_constant));
369 }
370
371 /* GL locking is done by the caller */
372 static inline void walk_constant_heap_clamped(const struct wined3d_gl_info *gl_info, const float *constants,
373         const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
374 {
375     int stack_idx = 0;
376     unsigned int heap_idx = 1;
377     unsigned int idx;
378
379     if (heap->entries[heap_idx].version <= version) return;
380
381     idx = heap->entries[heap_idx].idx;
382     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
383     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
384
385     while (stack_idx >= 0)
386     {
387         /* Note that we fall through to the next case statement. */
388         switch(stack[stack_idx])
389         {
390             case HEAP_NODE_TRAVERSE_LEFT:
391             {
392                 unsigned int left_idx = heap_idx << 1;
393                 if (left_idx < heap->size && heap->entries[left_idx].version > version)
394                 {
395                     heap_idx = left_idx;
396                     idx = heap->entries[heap_idx].idx;
397                     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
398
399                     stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
400                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
401                     break;
402                 }
403             }
404
405             case HEAP_NODE_TRAVERSE_RIGHT:
406             {
407                 unsigned int right_idx = (heap_idx << 1) + 1;
408                 if (right_idx < heap->size && heap->entries[right_idx].version > version)
409                 {
410                     heap_idx = right_idx;
411                     idx = heap->entries[heap_idx].idx;
412                     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
413
414                     stack[stack_idx++] = HEAP_NODE_POP;
415                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
416                     break;
417                 }
418             }
419
420             case HEAP_NODE_POP:
421             {
422                 heap_idx >>= 1;
423                 --stack_idx;
424                 break;
425             }
426         }
427     }
428     checkGLcall("walk_constant_heap_clamped()");
429 }
430
431 /* Loads floating point constants (aka uniforms) into the currently set GLSL program. */
432 /* GL locking is done by the caller */
433 static void shader_glsl_load_constantsF(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info,
434         const float *constants, const GLint *constant_locations, const struct constant_heap *heap,
435         unsigned char *stack, UINT version)
436 {
437     const local_constant *lconst;
438
439     /* 1.X pshaders have the constants clamped to [-1;1] implicitly. */
440     if (This->baseShader.reg_maps.shader_version.major == 1
441             && shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type))
442         walk_constant_heap_clamped(gl_info, constants, constant_locations, heap, stack, version);
443     else
444         walk_constant_heap(gl_info, constants, constant_locations, heap, stack, version);
445
446     if (!This->baseShader.load_local_constsF)
447     {
448         TRACE("No need to load local float constants for this shader\n");
449         return;
450     }
451
452     /* Immediate constants are clamped to [-1;1] at shader creation time if needed */
453     LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry)
454     {
455         GLint location = constant_locations[lconst->idx];
456         /* We found this uniform name in the program - go ahead and send the data */
457         if (location != -1) GL_EXTCALL(glUniform4fvARB(location, 1, (const GLfloat *)lconst->value));
458     }
459     checkGLcall("glUniform4fvARB()");
460 }
461
462 /* Loads integer constants (aka uniforms) into the currently set GLSL program. */
463 /* GL locking is done by the caller */
464 static void shader_glsl_load_constantsI(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info,
465         const GLint locations[MAX_CONST_I], const int *constants, WORD constants_set)
466 {
467     unsigned int i;
468     struct list* ptr;
469
470     for (i = 0; constants_set; constants_set >>= 1, ++i)
471     {
472         if (!(constants_set & 1)) continue;
473
474         TRACE_(d3d_constants)("Loading constants %u: %i, %i, %i, %i\n",
475                 i, constants[i*4], constants[i*4+1], constants[i*4+2], constants[i*4+3]);
476
477         /* We found this uniform name in the program - go ahead and send the data */
478         GL_EXTCALL(glUniform4ivARB(locations[i], 1, &constants[i*4]));
479         checkGLcall("glUniform4ivARB");
480     }
481
482     /* Load immediate constants */
483     ptr = list_head(&This->baseShader.constantsI);
484     while (ptr) {
485         const struct local_constant *lconst = LIST_ENTRY(ptr, const struct local_constant, entry);
486         unsigned int idx = lconst->idx;
487         const GLint *values = (const GLint *)lconst->value;
488
489         TRACE_(d3d_constants)("Loading local constants %i: %i, %i, %i, %i\n", idx,
490             values[0], values[1], values[2], values[3]);
491
492         /* We found this uniform name in the program - go ahead and send the data */
493         GL_EXTCALL(glUniform4ivARB(locations[idx], 1, values));
494         checkGLcall("glUniform4ivARB");
495         ptr = list_next(&This->baseShader.constantsI, ptr);
496     }
497 }
498
499 /* Loads boolean constants (aka uniforms) into the currently set GLSL program. */
500 /* GL locking is done by the caller */
501 static void shader_glsl_load_constantsB(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info,
502         GLhandleARB programId, const BOOL *constants, WORD constants_set)
503 {
504     GLint tmp_loc;
505     unsigned int i;
506     char tmp_name[8];
507     const char *prefix;
508     struct list* ptr;
509
510     switch (This->baseShader.reg_maps.shader_version.type)
511     {
512         case WINED3D_SHADER_TYPE_VERTEX:
513             prefix = "VB";
514             break;
515
516         case WINED3D_SHADER_TYPE_GEOMETRY:
517             prefix = "GB";
518             break;
519
520         case WINED3D_SHADER_TYPE_PIXEL:
521             prefix = "PB";
522             break;
523
524         default:
525             FIXME("Unknown shader type %#x.\n",
526                     This->baseShader.reg_maps.shader_version.type);
527             prefix = "UB";
528             break;
529     }
530
531     /* TODO: Benchmark and see if it would be beneficial to store the
532      * locations of the constants to avoid looking up each time */
533     for (i = 0; constants_set; constants_set >>= 1, ++i)
534     {
535         if (!(constants_set & 1)) continue;
536
537         TRACE_(d3d_constants)("Loading constants %i: %i;\n", i, constants[i]);
538
539         /* TODO: Benchmark and see if it would be beneficial to store the
540          * locations of the constants to avoid looking up each time */
541         snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, i);
542         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
543         if (tmp_loc != -1)
544         {
545             /* We found this uniform name in the program - go ahead and send the data */
546             GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, &constants[i]));
547             checkGLcall("glUniform1ivARB");
548         }
549     }
550
551     /* Load immediate constants */
552     ptr = list_head(&This->baseShader.constantsB);
553     while (ptr) {
554         const struct local_constant *lconst = LIST_ENTRY(ptr, const struct local_constant, entry);
555         unsigned int idx = lconst->idx;
556         const GLint *values = (const GLint *)lconst->value;
557
558         TRACE_(d3d_constants)("Loading local constants %i: %i\n", idx, values[0]);
559
560         snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, idx);
561         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
562         if (tmp_loc != -1) {
563             /* We found this uniform name in the program - go ahead and send the data */
564             GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, values));
565             checkGLcall("glUniform1ivARB");
566         }
567         ptr = list_next(&This->baseShader.constantsB, ptr);
568     }
569 }
570
571 static void reset_program_constant_version(struct wine_rb_entry *entry, void *context)
572 {
573     WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry)->constant_version = 0;
574 }
575
576 /**
577  * Loads the texture dimensions for NP2 fixup into the currently set GLSL program.
578  */
579 /* GL locking is done by the caller (state handler) */
580 static void shader_glsl_load_np2fixup_constants(
581     IWineD3DDevice* device,
582     char usePixelShader,
583     char useVertexShader) {
584
585     const IWineD3DDeviceImpl* deviceImpl = (const IWineD3DDeviceImpl*) device;
586     const struct glsl_shader_prog_link* prog = ((struct shader_glsl_priv *)(deviceImpl->shader_priv))->glsl_program;
587
588     if (!prog) {
589         /* No GLSL program set - nothing to do. */
590         return;
591     }
592
593     if (!usePixelShader) {
594         /* NP2 texcoord fixup is (currently) only done for pixelshaders. */
595         return;
596     }
597
598     if (prog->ps_args.np2_fixup && -1 != prog->np2Fixup_location) {
599         const struct wined3d_gl_info *gl_info = &deviceImpl->adapter->gl_info;
600         const IWineD3DStateBlockImpl* stateBlock = (const IWineD3DStateBlockImpl*) deviceImpl->stateBlock;
601         UINT i;
602         UINT fixup = prog->ps_args.np2_fixup;
603         GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS];
604
605         for (i = 0; fixup; fixup >>= 1, ++i) {
606             const unsigned char idx = prog->np2Fixup_info->idx[i];
607             const IWineD3DBaseTextureImpl* const tex = (const IWineD3DBaseTextureImpl*) stateBlock->textures[i];
608             GLfloat* tex_dim = &np2fixup_constants[(idx >> 1) * 4];
609
610             if (!tex) {
611                 FIXME("Nonexistent texture is flagged for NP2 texcoord fixup\n");
612                 continue;
613             }
614
615             if (idx % 2) {
616                 tex_dim[2] = tex->baseTexture.pow2Matrix[0]; tex_dim[3] = tex->baseTexture.pow2Matrix[5];
617             } else {
618                 tex_dim[0] = tex->baseTexture.pow2Matrix[0]; tex_dim[1] = tex->baseTexture.pow2Matrix[5];
619             }
620         }
621
622         GL_EXTCALL(glUniform4fvARB(prog->np2Fixup_location, prog->np2Fixup_info->num_consts, np2fixup_constants));
623     }
624 }
625
626 /**
627  * Loads the app-supplied constants into the currently set GLSL program.
628  */
629 /* GL locking is done by the caller (state handler) */
630 static void shader_glsl_load_constants(const struct wined3d_context *context,
631         char usePixelShader, char useVertexShader)
632 {
633     IWineD3DDeviceImpl *device = ((IWineD3DSurfaceImpl *)context->surface)->resource.wineD3DDevice;
634     const struct wined3d_gl_info *gl_info = context->gl_info;
635     IWineD3DStateBlockImpl* stateBlock = device->stateBlock;
636     struct shader_glsl_priv *priv = device->shader_priv;
637
638     GLhandleARB programId;
639     struct glsl_shader_prog_link *prog = priv->glsl_program;
640     UINT constant_version;
641     int i;
642
643     if (!prog) {
644         /* No GLSL program set - nothing to do. */
645         return;
646     }
647     programId = prog->programId;
648     constant_version = prog->constant_version;
649
650     if (useVertexShader) {
651         IWineD3DBaseShaderImpl* vshader = (IWineD3DBaseShaderImpl*) stateBlock->vertexShader;
652
653         /* Load DirectX 9 float constants/uniforms for vertex shader */
654         shader_glsl_load_constantsF(vshader, gl_info, stateBlock->vertexShaderConstantF,
655                 prog->vuniformF_locations, &priv->vconst_heap, priv->stack, constant_version);
656
657         /* Load DirectX 9 integer constants/uniforms for vertex shader */
658         shader_glsl_load_constantsI(vshader, gl_info, prog->vuniformI_locations, stateBlock->vertexShaderConstantI,
659                 stateBlock->changed.vertexShaderConstantsI & vshader->baseShader.reg_maps.integer_constants);
660
661         /* Load DirectX 9 boolean constants/uniforms for vertex shader */
662         shader_glsl_load_constantsB(vshader, gl_info, programId, stateBlock->vertexShaderConstantB,
663                 stateBlock->changed.vertexShaderConstantsB & vshader->baseShader.reg_maps.boolean_constants);
664
665         /* Upload the position fixup params */
666         GL_EXTCALL(glUniform4fvARB(prog->posFixup_location, 1, &device->posFixup[0]));
667         checkGLcall("glUniform4fvARB");
668     }
669
670     if (usePixelShader) {
671
672         IWineD3DBaseShaderImpl* pshader = (IWineD3DBaseShaderImpl*) stateBlock->pixelShader;
673
674         /* Load DirectX 9 float constants/uniforms for pixel shader */
675         shader_glsl_load_constantsF(pshader, gl_info, stateBlock->pixelShaderConstantF,
676                 prog->puniformF_locations, &priv->pconst_heap, priv->stack, constant_version);
677
678         /* Load DirectX 9 integer constants/uniforms for pixel shader */
679         shader_glsl_load_constantsI(pshader, gl_info, prog->puniformI_locations, stateBlock->pixelShaderConstantI,
680                 stateBlock->changed.pixelShaderConstantsI & pshader->baseShader.reg_maps.integer_constants);
681
682         /* Load DirectX 9 boolean constants/uniforms for pixel shader */
683         shader_glsl_load_constantsB(pshader, gl_info, programId, stateBlock->pixelShaderConstantB,
684                 stateBlock->changed.pixelShaderConstantsB & pshader->baseShader.reg_maps.boolean_constants);
685
686         /* Upload the environment bump map matrix if needed. The needsbumpmat member specifies the texture stage to load the matrix from.
687          * It can't be 0 for a valid texbem instruction.
688          */
689         for(i = 0; i < MAX_TEXTURES; i++) {
690             const float *data;
691
692             if(prog->bumpenvmat_location[i] == -1) continue;
693
694             data = (const float *)&stateBlock->textureState[i][WINED3DTSS_BUMPENVMAT00];
695             GL_EXTCALL(glUniformMatrix2fvARB(prog->bumpenvmat_location[i], 1, 0, data));
696             checkGLcall("glUniformMatrix2fvARB");
697
698             /* texbeml needs the luminance scale and offset too. If texbeml is used, needsbumpmat
699              * is set too, so we can check that in the needsbumpmat check
700              */
701             if(prog->luminancescale_location[i] != -1) {
702                 const GLfloat *scale = (const GLfloat *)&stateBlock->textureState[i][WINED3DTSS_BUMPENVLSCALE];
703                 const GLfloat *offset = (const GLfloat *)&stateBlock->textureState[i][WINED3DTSS_BUMPENVLOFFSET];
704
705                 GL_EXTCALL(glUniform1fvARB(prog->luminancescale_location[i], 1, scale));
706                 checkGLcall("glUniform1fvARB");
707                 GL_EXTCALL(glUniform1fvARB(prog->luminanceoffset_location[i], 1, offset));
708                 checkGLcall("glUniform1fvARB");
709             }
710         }
711
712         if(((IWineD3DPixelShaderImpl *) pshader)->vpos_uniform) {
713             float correction_params[4];
714
715             if (context->render_offscreen)
716             {
717                 correction_params[0] = 0.0f;
718                 correction_params[1] = 1.0f;
719             } else {
720                 /* position is window relative, not viewport relative */
721                 correction_params[0] = ((IWineD3DSurfaceImpl *)context->current_rt)->currentDesc.Height;
722                 correction_params[1] = -1.0f;
723             }
724             GL_EXTCALL(glUniform4fvARB(prog->ycorrection_location, 1, correction_params));
725         }
726     }
727
728     if (priv->next_constant_version == UINT_MAX)
729     {
730         TRACE("Max constant version reached, resetting to 0.\n");
731         wine_rb_for_each_entry(&priv->program_lookup, reset_program_constant_version, NULL);
732         priv->next_constant_version = 1;
733     }
734     else
735     {
736         prog->constant_version = priv->next_constant_version++;
737     }
738 }
739
740 static inline void update_heap_entry(struct constant_heap *heap, unsigned int idx,
741         unsigned int heap_idx, DWORD new_version)
742 {
743     struct constant_entry *entries = heap->entries;
744     unsigned int *positions = heap->positions;
745     unsigned int parent_idx;
746
747     while (heap_idx > 1)
748     {
749         parent_idx = heap_idx >> 1;
750
751         if (new_version <= entries[parent_idx].version) break;
752
753         entries[heap_idx] = entries[parent_idx];
754         positions[entries[parent_idx].idx] = heap_idx;
755         heap_idx = parent_idx;
756     }
757
758     entries[heap_idx].version = new_version;
759     entries[heap_idx].idx = idx;
760     positions[idx] = heap_idx;
761 }
762
763 static void shader_glsl_update_float_vertex_constants(IWineD3DDevice *iface, UINT start, UINT count)
764 {
765     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
766     struct shader_glsl_priv *priv = This->shader_priv;
767     struct constant_heap *heap = &priv->vconst_heap;
768     UINT i;
769
770     for (i = start; i < count + start; ++i)
771     {
772         if (!This->stateBlock->changed.vertexShaderConstantsF[i])
773             update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
774         else
775             update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
776     }
777 }
778
779 static void shader_glsl_update_float_pixel_constants(IWineD3DDevice *iface, UINT start, UINT count)
780 {
781     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
782     struct shader_glsl_priv *priv = This->shader_priv;
783     struct constant_heap *heap = &priv->pconst_heap;
784     UINT i;
785
786     for (i = start; i < count + start; ++i)
787     {
788         if (!This->stateBlock->changed.pixelShaderConstantsF[i])
789             update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
790         else
791             update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
792     }
793 }
794
795 static unsigned int vec4_varyings(DWORD shader_major, const struct wined3d_gl_info *gl_info)
796 {
797     unsigned int ret = gl_info->limits.glsl_varyings / 4;
798     /* 4.0 shaders do not write clip coords because d3d10 does not support user clipplanes */
799     if(shader_major > 3) return ret;
800
801     /* 3.0 shaders may need an extra varying for the clip coord on some cards(mostly dx10 ones) */
802     if (gl_info->quirks & WINED3D_QUIRK_GLSL_CLIP_VARYING) ret -= 1;
803     return ret;
804 }
805
806 /** Generate the variable & register declarations for the GLSL output target */
807 static void shader_generate_glsl_declarations(const struct wined3d_context *context,
808         struct wined3d_shader_buffer *buffer, IWineD3DBaseShader *iface,
809         const shader_reg_maps *reg_maps, struct shader_glsl_ctx_priv *ctx_priv)
810 {
811     IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
812     IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) This->baseShader.device;
813     const struct ps_compile_args *ps_args = ctx_priv->cur_ps_args;
814     const struct wined3d_gl_info *gl_info = context->gl_info;
815     unsigned int i, extra_constants_needed = 0;
816     const local_constant *lconst;
817     DWORD map;
818
819     /* There are some minor differences between pixel and vertex shaders */
820     char pshader = shader_is_pshader_version(reg_maps->shader_version.type);
821     char prefix = pshader ? 'P' : 'V';
822
823     /* Prototype the subroutines */
824     for (i = 0, map = reg_maps->labels; map; map >>= 1, ++i)
825     {
826         if (map & 1) shader_addline(buffer, "void subroutine%u();\n", i);
827     }
828
829     /* Declare the constants (aka uniforms) */
830     if (This->baseShader.limits.constant_float > 0) {
831         unsigned max_constantsF;
832         /* Unless the shader uses indirect addressing, always declare the maximum array size and ignore that we need some
833          * uniforms privately. E.g. if GL supports 256 uniforms, and we need 2 for the pos fixup and immediate values, still
834          * declare VC[256]. If the shader needs more uniforms than we have it won't work in any case. If it uses less, the
835          * compiler will figure out which uniforms are really used and strip them out. This allows a shader to use c255 on
836          * a dx9 card, as long as it doesn't also use all the other constants.
837          *
838          * If the shader uses indirect addressing the compiler must assume that all declared uniforms are used. In this case,
839          * declare only the amount that we're assured to have.
840          *
841          * Thus we run into problems in these two cases:
842          * 1) The shader really uses more uniforms than supported
843          * 2) The shader uses indirect addressing, less constants than supported, but uses a constant index > #supported consts
844          */
845         if (pshader)
846         {
847             /* No indirect addressing here. */
848             max_constantsF = gl_info->limits.glsl_ps_float_constants;
849         }
850         else
851         {
852             if(This->baseShader.reg_maps.usesrelconstF) {
853                 /* Subtract the other potential uniforms from the max available (bools, ints, and 1 row of projection matrix).
854                  * Subtract another uniform for immediate values, which have to be loaded via uniform by the driver as well.
855                  * The shader code only uses 0.5, 2.0, 1.0, 128 and -128 in vertex shader code, so one vec4 should be enough
856                  * (Unfortunately the Nvidia driver doesn't store 128 and -128 in one float).
857                  *
858                  * Writing gl_ClipPos requires one uniform for each clipplane as well.
859                  */
860                 max_constantsF = gl_info->limits.glsl_vs_float_constants - 3 - gl_info->limits.clipplanes;
861                 max_constantsF -= count_bits(This->baseShader.reg_maps.integer_constants);
862                 /* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly,
863                  * so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but
864                  * for now take this into account when calculating the number of available constants
865                  */
866                 max_constantsF -= count_bits(This->baseShader.reg_maps.boolean_constants);
867                 /* Set by driver quirks in directx.c */
868                 max_constantsF -= gl_info->reserved_glsl_constants;
869             }
870             else
871             {
872                 max_constantsF = gl_info->limits.glsl_vs_float_constants;
873             }
874         }
875         max_constantsF = min(This->baseShader.limits.constant_float, max_constantsF);
876         shader_addline(buffer, "uniform vec4 %cC[%u];\n", prefix, max_constantsF);
877     }
878
879     /* Always declare the full set of constants, the compiler can remove the unused ones because d3d doesn't(yet)
880      * support indirect int and bool constant addressing. This avoids problems if the app uses e.g. i0 and i9.
881      */
882     if (This->baseShader.limits.constant_int > 0 && This->baseShader.reg_maps.integer_constants)
883         shader_addline(buffer, "uniform ivec4 %cI[%u];\n", prefix, This->baseShader.limits.constant_int);
884
885     if (This->baseShader.limits.constant_bool > 0 && This->baseShader.reg_maps.boolean_constants)
886         shader_addline(buffer, "uniform bool %cB[%u];\n", prefix, This->baseShader.limits.constant_bool);
887
888     if(!pshader) {
889         shader_addline(buffer, "uniform vec4 posFixup;\n");
890         /* Predeclaration; This function is added at link time based on the pixel shader.
891          * VS 3.0 shaders have an array OUT[] the shader writes to, earlier versions don't have
892          * that. We know the input to the reorder function at vertex shader compile time, so
893          * we can deal with that. The reorder function for a 1.x and 2.x vertex shader can just
894          * read gl_FrontColor. The output depends on the pixel shader. The reorder function for a
895          * 1.x and 2.x pshader or for fixed function will write gl_FrontColor, and for a 3.0 shader
896          * it will write to the varying array. Here we depend on the shader optimizer on sorting that
897          * out. The nvidia driver only does that if the parameter is inout instead of out, hence the
898          * inout.
899          */
900         if (reg_maps->shader_version.major >= 3)
901         {
902             shader_addline(buffer, "void order_ps_input(in vec4[%u]);\n", MAX_REG_OUTPUT);
903         } else {
904             shader_addline(buffer, "void order_ps_input();\n");
905         }
906     } else {
907         for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i)
908         {
909             if (!(map & 1)) continue;
910
911             shader_addline(buffer, "uniform mat2 bumpenvmat%d;\n", i);
912
913             if (reg_maps->luminanceparams & (1 << i))
914             {
915                 shader_addline(buffer, "uniform float luminancescale%d;\n", i);
916                 shader_addline(buffer, "uniform float luminanceoffset%d;\n", i);
917                 extra_constants_needed++;
918             }
919
920             extra_constants_needed++;
921         }
922
923         if (ps_args->srgb_correction)
924         {
925             shader_addline(buffer, "const vec4 srgb_const0 = vec4(%.8e, %.8e, %.8e, %.8e);\n",
926                     srgb_pow, srgb_mul_high, srgb_sub_high, srgb_mul_low);
927             shader_addline(buffer, "const vec4 srgb_const1 = vec4(%.8e, 0.0, 0.0, 0.0);\n",
928                     srgb_cmp);
929         }
930         if (reg_maps->vpos || reg_maps->usesdsy)
931         {
932             if (This->baseShader.limits.constant_float + extra_constants_needed
933                     + 1 < gl_info->limits.glsl_ps_float_constants)
934             {
935                 shader_addline(buffer, "uniform vec4 ycorrection;\n");
936                 ((IWineD3DPixelShaderImpl *) This)->vpos_uniform = 1;
937                 extra_constants_needed++;
938             } else {
939                 /* This happens because we do not have proper tracking of the constant registers that are
940                  * actually used, only the max limit of the shader version
941                  */
942                 FIXME("Cannot find a free uniform for vpos correction params\n");
943                 shader_addline(buffer, "const vec4 ycorrection = vec4(%f, %f, 0.0, 0.0);\n",
944                         context->render_offscreen ? 0.0f : ((IWineD3DSurfaceImpl *)device->render_targets[0])->currentDesc.Height,
945                         context->render_offscreen ? 1.0f : -1.0f);
946             }
947             shader_addline(buffer, "vec4 vpos;\n");
948         }
949     }
950
951     /* Declare texture samplers */
952     for (i = 0; i < This->baseShader.limits.sampler; i++) {
953         if (reg_maps->sampler_type[i])
954         {
955             switch (reg_maps->sampler_type[i])
956             {
957                 case WINED3DSTT_1D:
958                     shader_addline(buffer, "uniform sampler1D %csampler%u;\n", prefix, i);
959                     break;
960                 case WINED3DSTT_2D:
961                     if(device->stateBlock->textures[i] &&
962                        IWineD3DBaseTexture_GetTextureDimensions(device->stateBlock->textures[i]) == GL_TEXTURE_RECTANGLE_ARB) {
963                         shader_addline(buffer, "uniform sampler2DRect %csampler%u;\n", prefix, i);
964                     } else {
965                         shader_addline(buffer, "uniform sampler2D %csampler%u;\n", prefix, i);
966                     }
967                     break;
968                 case WINED3DSTT_CUBE:
969                     shader_addline(buffer, "uniform samplerCube %csampler%u;\n", prefix, i);
970                     break;
971                 case WINED3DSTT_VOLUME:
972                     shader_addline(buffer, "uniform sampler3D %csampler%u;\n", prefix, i);
973                     break;
974                 default:
975                     shader_addline(buffer, "uniform unsupported_sampler %csampler%u;\n", prefix, i);
976                     FIXME("Unrecognized sampler type: %#x\n", reg_maps->sampler_type[i]);
977                     break;
978             }
979         }
980     }
981
982     /* Declare uniforms for NP2 texcoord fixup:
983      * This is NOT done inside the loop that declares the texture samplers since the NP2 fixup code
984      * is currently only used for the GeforceFX series and when forcing the ARB_npot extension off.
985      * Modern cards just skip the code anyway, so put it inside a separate loop. */
986     if (pshader && ps_args->np2_fixup) {
987
988         struct ps_np2fixup_info* const fixup = ctx_priv->cur_np2fixup_info;
989         UINT cur = 0;
990
991         /* NP2/RECT textures in OpenGL use texcoords in the range [0,width]x[0,height]
992          * while D3D has them in the (normalized) [0,1]x[0,1] range.
993          * samplerNP2Fixup stores texture dimensions and is updated through
994          * shader_glsl_load_np2fixup_constants when the sampler changes. */
995
996         for (i = 0; i < This->baseShader.limits.sampler; ++i) {
997             if (reg_maps->sampler_type[i]) {
998                 if (!(ps_args->np2_fixup & (1 << i))) continue;
999
1000                 if (WINED3DSTT_2D != reg_maps->sampler_type[i]) {
1001                     FIXME("Non-2D texture is flagged for NP2 texcoord fixup.\n");
1002                     continue;
1003                 }
1004
1005                 fixup->idx[i] = cur++;
1006             }
1007         }
1008
1009         fixup->num_consts = (cur + 1) >> 1;
1010         shader_addline(buffer, "uniform vec4 %csamplerNP2Fixup[%u];\n", prefix, fixup->num_consts);
1011     }
1012
1013     /* Declare address variables */
1014     for (i = 0, map = reg_maps->address; map; map >>= 1, ++i)
1015     {
1016         if (map & 1) shader_addline(buffer, "ivec4 A%u;\n", i);
1017     }
1018
1019     /* Declare texture coordinate temporaries and initialize them */
1020     for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i)
1021     {
1022         if (map & 1) shader_addline(buffer, "vec4 T%u = gl_TexCoord[%u];\n", i, i);
1023     }
1024
1025     /* Declare input register varyings. Only pixel shader, vertex shaders have that declared in the
1026      * helper function shader that is linked in at link time
1027      */
1028     if (pshader && reg_maps->shader_version.major >= 3)
1029     {
1030         if (use_vs(device->stateBlock))
1031         {
1032             shader_addline(buffer, "varying vec4 IN[%u];\n", vec4_varyings(reg_maps->shader_version.major, gl_info));
1033         } else {
1034             /* TODO: Write a replacement shader for the fixed function vertex pipeline, so this isn't needed.
1035              * For fixed function vertex processing + 3.0 pixel shader we need a separate function in the
1036              * pixel shader that reads the fixed function color into the packed input registers.
1037              */
1038             shader_addline(buffer, "vec4 IN[%u];\n", vec4_varyings(reg_maps->shader_version.major, gl_info));
1039         }
1040     }
1041
1042     /* Declare output register temporaries */
1043     if(This->baseShader.limits.packed_output) {
1044         shader_addline(buffer, "vec4 OUT[%u];\n", This->baseShader.limits.packed_output);
1045     }
1046
1047     /* Declare temporary variables */
1048     for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
1049     {
1050         if (map & 1) shader_addline(buffer, "vec4 R%u;\n", i);
1051     }
1052
1053     /* Declare attributes */
1054     if (reg_maps->shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
1055     {
1056         for (i = 0, map = reg_maps->input_registers; map; map >>= 1, ++i)
1057         {
1058             if (map & 1) shader_addline(buffer, "attribute vec4 attrib%i;\n", i);
1059         }
1060     }
1061
1062     /* Declare loop registers aLx */
1063     for (i = 0; i < reg_maps->loop_depth; i++) {
1064         shader_addline(buffer, "int aL%u;\n", i);
1065         shader_addline(buffer, "int tmpInt%u;\n", i);
1066     }
1067
1068     /* Temporary variables for matrix operations */
1069     shader_addline(buffer, "vec4 tmp0;\n");
1070     shader_addline(buffer, "vec4 tmp1;\n");
1071
1072     /* Local constants use a different name so they can be loaded once at shader link time
1073      * They can't be hardcoded into the shader text via LC = {x, y, z, w}; because the
1074      * float -> string conversion can cause precision loss.
1075      */
1076     if(!This->baseShader.load_local_constsF) {
1077         LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
1078             shader_addline(buffer, "uniform vec4 %cLC%u;\n", prefix, lconst->idx);
1079         }
1080     }
1081
1082     /* Start the main program */
1083     shader_addline(buffer, "void main() {\n");
1084     if(pshader && reg_maps->vpos) {
1085         /* DirectX apps expect integer values, while OpenGL drivers add approximately 0.5. This causes
1086          * off-by-one problems as spotted by the vPos d3d9 visual test. Unfortunately the ATI cards do
1087          * not add exactly 0.5, but rather something like 0.49999999 or 0.50000001, which still causes
1088          * precision troubles when we just substract 0.5.
1089          *
1090          * To deal with that just floor() the position. This will eliminate the fraction on all cards.
1091          *
1092          * TODO: Test how that behaves with multisampling once we can enable multisampling in winex11.
1093          *
1094          * An advantage of floor is that it works even if the driver doesn't add 1/2. It is somewhat
1095          * questionable if 1.5, 2.5, ... are the proper values to return in gl_FragCoord, even though
1096          * coordinates specify the pixel centers instead of the pixel corners. This code will behave
1097          * correctly on drivers that returns integer values.
1098          */
1099         shader_addline(buffer, "vpos = floor(vec4(0, ycorrection[0], 0, 0) + gl_FragCoord * vec4(1, ycorrection[1], 1, 1));\n");
1100     }
1101 }
1102
1103 /*****************************************************************************
1104  * Functions to generate GLSL strings from DirectX Shader bytecode begin here.
1105  *
1106  * For more information, see http://wiki.winehq.org/DirectX-Shaders
1107  ****************************************************************************/
1108
1109 /* Prototypes */
1110 static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins,
1111         const struct wined3d_shader_src_param *wined3d_src, DWORD mask, glsl_src_param_t *glsl_src);
1112
1113 /** Used for opcode modifiers - They multiply the result by the specified amount */
1114 static const char * const shift_glsl_tab[] = {
1115     "",           /*  0 (none) */
1116     "2.0 * ",     /*  1 (x2)   */
1117     "4.0 * ",     /*  2 (x4)   */
1118     "8.0 * ",     /*  3 (x8)   */
1119     "16.0 * ",    /*  4 (x16)  */
1120     "32.0 * ",    /*  5 (x32)  */
1121     "",           /*  6 (x64)  */
1122     "",           /*  7 (x128) */
1123     "",           /*  8 (d256) */
1124     "",           /*  9 (d128) */
1125     "",           /* 10 (d64)  */
1126     "",           /* 11 (d32)  */
1127     "0.0625 * ",  /* 12 (d16)  */
1128     "0.125 * ",   /* 13 (d8)   */
1129     "0.25 * ",    /* 14 (d4)   */
1130     "0.5 * "      /* 15 (d2)   */
1131 };
1132
1133 /* Generate a GLSL parameter that does the input modifier computation and return the input register/mask to use */
1134 static void shader_glsl_gen_modifier(DWORD src_modifier, const char *in_reg, const char *in_regswizzle, char *out_str)
1135 {
1136     out_str[0] = 0;
1137
1138     switch (src_modifier)
1139     {
1140     case WINED3DSPSM_DZ: /* Need to handle this in the instructions itself (texld & texcrd). */
1141     case WINED3DSPSM_DW:
1142     case WINED3DSPSM_NONE:
1143         sprintf(out_str, "%s%s", in_reg, in_regswizzle);
1144         break;
1145     case WINED3DSPSM_NEG:
1146         sprintf(out_str, "-%s%s", in_reg, in_regswizzle);
1147         break;
1148     case WINED3DSPSM_NOT:
1149         sprintf(out_str, "!%s%s", in_reg, in_regswizzle);
1150         break;
1151     case WINED3DSPSM_BIAS:
1152         sprintf(out_str, "(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
1153         break;
1154     case WINED3DSPSM_BIASNEG:
1155         sprintf(out_str, "-(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
1156         break;
1157     case WINED3DSPSM_SIGN:
1158         sprintf(out_str, "(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
1159         break;
1160     case WINED3DSPSM_SIGNNEG:
1161         sprintf(out_str, "-(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
1162         break;
1163     case WINED3DSPSM_COMP:
1164         sprintf(out_str, "(1.0 - %s%s)", in_reg, in_regswizzle);
1165         break;
1166     case WINED3DSPSM_X2:
1167         sprintf(out_str, "(2.0 * %s%s)", in_reg, in_regswizzle);
1168         break;
1169     case WINED3DSPSM_X2NEG:
1170         sprintf(out_str, "-(2.0 * %s%s)", in_reg, in_regswizzle);
1171         break;
1172     case WINED3DSPSM_ABS:
1173         sprintf(out_str, "abs(%s%s)", in_reg, in_regswizzle);
1174         break;
1175     case WINED3DSPSM_ABSNEG:
1176         sprintf(out_str, "-abs(%s%s)", in_reg, in_regswizzle);
1177         break;
1178     default:
1179         FIXME("Unhandled modifier %u\n", src_modifier);
1180         sprintf(out_str, "%s%s", in_reg, in_regswizzle);
1181     }
1182 }
1183
1184 /** Writes the GLSL variable name that corresponds to the register that the
1185  * DX opcode parameter is trying to access */
1186 static void shader_glsl_get_register_name(const struct wined3d_shader_register *reg,
1187         char *register_name, BOOL *is_color, const struct wined3d_shader_instruction *ins)
1188 {
1189     /* oPos, oFog and oPts in D3D */
1190     static const char * const hwrastout_reg_names[] = { "gl_Position", "gl_FogFragCoord", "gl_PointSize" };
1191
1192     IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
1193     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
1194     const struct wined3d_gl_info *gl_info = &deviceImpl->adapter->gl_info;
1195     char pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type);
1196
1197     *is_color = FALSE;
1198
1199     switch (reg->type)
1200     {
1201         case WINED3DSPR_TEMP:
1202             sprintf(register_name, "R%u", reg->idx);
1203             break;
1204
1205         case WINED3DSPR_INPUT:
1206             /* vertex shaders */
1207             if (!pshader)
1208             {
1209                 struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
1210                 if (priv->cur_vs_args->swizzle_map & (1 << reg->idx)) *is_color = TRUE;
1211                 sprintf(register_name, "attrib%u", reg->idx);
1212                 break;
1213             }
1214
1215             /* pixel shaders >= 3.0 */
1216             if (This->baseShader.reg_maps.shader_version.major >= 3)
1217             {
1218                 DWORD idx = ((IWineD3DPixelShaderImpl *)This)->input_reg_map[reg->idx];
1219                 unsigned int in_count = vec4_varyings(This->baseShader.reg_maps.shader_version.major, gl_info);
1220
1221                 if (reg->rel_addr)
1222                 {
1223                     glsl_src_param_t rel_param;
1224
1225                     shader_glsl_add_src_param(ins, reg->rel_addr, WINED3DSP_WRITEMASK_0, &rel_param);
1226
1227                     /* Removing a + 0 would be an obvious optimization, but macos doesn't see the NOP
1228                      * operation there */
1229                     if (idx)
1230                     {
1231                         if (((IWineD3DPixelShaderImpl *)This)->declared_in_count > in_count)
1232                         {
1233                             sprintf(register_name,
1234                                     "((%s + %u) > %d ? (%s + %u) > %d ? gl_SecondaryColor : gl_Color : IN[%s + %u])",
1235                                     rel_param.param_str, idx, in_count - 1, rel_param.param_str, idx, in_count,
1236                                     rel_param.param_str, idx);
1237                         }
1238                         else
1239                         {
1240                             sprintf(register_name, "IN[%s + %u]", rel_param.param_str, idx);
1241                         }
1242                     }
1243                     else
1244                     {
1245                         if (((IWineD3DPixelShaderImpl *)This)->declared_in_count > in_count)
1246                         {
1247                             sprintf(register_name, "((%s) > %d ? (%s) > %d ? gl_SecondaryColor : gl_Color : IN[%s])",
1248                                     rel_param.param_str, in_count - 1, rel_param.param_str, in_count,
1249                                     rel_param.param_str);
1250                         }
1251                         else
1252                         {
1253                             sprintf(register_name, "IN[%s]", rel_param.param_str);
1254                         }
1255                     }
1256                 }
1257                 else
1258                 {
1259                     if (idx == in_count) sprintf(register_name, "gl_Color");
1260                     else if (idx == in_count + 1) sprintf(register_name, "gl_SecondaryColor");
1261                     else sprintf(register_name, "IN[%u]", idx);
1262                 }
1263             }
1264             else
1265             {
1266                 if (reg->idx == 0) strcpy(register_name, "gl_Color");
1267                 else strcpy(register_name, "gl_SecondaryColor");
1268                 break;
1269             }
1270             break;
1271
1272         case WINED3DSPR_CONST:
1273             {
1274                 const char prefix = pshader ? 'P' : 'V';
1275
1276                 /* Relative addressing */
1277                 if (reg->rel_addr)
1278                 {
1279                     glsl_src_param_t rel_param;
1280                     shader_glsl_add_src_param(ins, reg->rel_addr, WINED3DSP_WRITEMASK_0, &rel_param);
1281                     if (reg->idx) sprintf(register_name, "%cC[%s + %u]", prefix, rel_param.param_str, reg->idx);
1282                     else sprintf(register_name, "%cC[%s]", prefix, rel_param.param_str);
1283                 }
1284                 else
1285                 {
1286                     if (shader_constant_is_local(This, reg->idx))
1287                         sprintf(register_name, "%cLC%u", prefix, reg->idx);
1288                     else
1289                         sprintf(register_name, "%cC[%u]", prefix, reg->idx);
1290                 }
1291             }
1292             break;
1293
1294         case WINED3DSPR_CONSTINT:
1295             if (pshader) sprintf(register_name, "PI[%u]", reg->idx);
1296             else sprintf(register_name, "VI[%u]", reg->idx);
1297             break;
1298
1299         case WINED3DSPR_CONSTBOOL:
1300             if (pshader) sprintf(register_name, "PB[%u]", reg->idx);
1301             else sprintf(register_name, "VB[%u]", reg->idx);
1302             break;
1303
1304         case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
1305             if (pshader) sprintf(register_name, "T%u", reg->idx);
1306             else sprintf(register_name, "A%u", reg->idx);
1307             break;
1308
1309         case WINED3DSPR_LOOP:
1310             sprintf(register_name, "aL%u", This->baseShader.cur_loop_regno - 1);
1311             break;
1312
1313         case WINED3DSPR_SAMPLER:
1314             if (pshader) sprintf(register_name, "Psampler%u", reg->idx);
1315             else sprintf(register_name, "Vsampler%u", reg->idx);
1316             break;
1317
1318         case WINED3DSPR_COLOROUT:
1319             if (reg->idx >= gl_info->limits.buffers)
1320                 WARN("Write to render target %u, only %d supported.\n", reg->idx, gl_info->limits.buffers);
1321
1322             sprintf(register_name, "gl_FragData[%u]", reg->idx);
1323             break;
1324
1325         case WINED3DSPR_RASTOUT:
1326             sprintf(register_name, "%s", hwrastout_reg_names[reg->idx]);
1327             break;
1328
1329         case WINED3DSPR_DEPTHOUT:
1330             sprintf(register_name, "gl_FragDepth");
1331             break;
1332
1333         case WINED3DSPR_ATTROUT:
1334             if (reg->idx == 0) sprintf(register_name, "gl_FrontColor");
1335             else sprintf(register_name, "gl_FrontSecondaryColor");
1336             break;
1337
1338         case WINED3DSPR_TEXCRDOUT:
1339             /* Vertex shaders >= 3.0: WINED3DSPR_OUTPUT */
1340             if (This->baseShader.reg_maps.shader_version.major >= 3) sprintf(register_name, "OUT[%u]", reg->idx);
1341             else sprintf(register_name, "gl_TexCoord[%u]", reg->idx);
1342             break;
1343
1344         case WINED3DSPR_MISCTYPE:
1345             if (reg->idx == 0)
1346             {
1347                 /* vPos */
1348                 sprintf(register_name, "vpos");
1349             }
1350             else if (reg->idx == 1)
1351             {
1352                 /* Note that gl_FrontFacing is a bool, while vFace is
1353                  * a float for which the sign determines front/back */
1354                 sprintf(register_name, "(gl_FrontFacing ? 1.0 : -1.0)");
1355             }
1356             else
1357             {
1358                 FIXME("Unhandled misctype register %d\n", reg->idx);
1359                 sprintf(register_name, "unrecognized_register");
1360             }
1361             break;
1362
1363         case WINED3DSPR_IMMCONST:
1364             switch (reg->immconst_type)
1365             {
1366                 case WINED3D_IMMCONST_FLOAT:
1367                     sprintf(register_name, "%.8e", *(const float *)reg->immconst_data);
1368                     break;
1369
1370                 case WINED3D_IMMCONST_FLOAT4:
1371                     sprintf(register_name, "vec4(%.8e, %.8e, %.8e, %.8e)",
1372                             *(const float *)&reg->immconst_data[0], *(const float *)&reg->immconst_data[1],
1373                             *(const float *)&reg->immconst_data[2], *(const float *)&reg->immconst_data[3]);
1374                     break;
1375
1376                 default:
1377                     FIXME("Unhandled immconst type %#x\n", reg->immconst_type);
1378                     sprintf(register_name, "<unhandled_immconst_type %#x>", reg->immconst_type);
1379             }
1380             break;
1381
1382         default:
1383             FIXME("Unhandled register name Type(%d)\n", reg->type);
1384             sprintf(register_name, "unrecognized_register");
1385             break;
1386     }
1387 }
1388
1389 static void shader_glsl_write_mask_to_str(DWORD write_mask, char *str)
1390 {
1391     *str++ = '.';
1392     if (write_mask & WINED3DSP_WRITEMASK_0) *str++ = 'x';
1393     if (write_mask & WINED3DSP_WRITEMASK_1) *str++ = 'y';
1394     if (write_mask & WINED3DSP_WRITEMASK_2) *str++ = 'z';
1395     if (write_mask & WINED3DSP_WRITEMASK_3) *str++ = 'w';
1396     *str = '\0';
1397 }
1398
1399 /* Get the GLSL write mask for the destination register */
1400 static DWORD shader_glsl_get_write_mask(const struct wined3d_shader_dst_param *param, char *write_mask)
1401 {
1402     DWORD mask = param->write_mask;
1403
1404     if (shader_is_scalar(&param->reg))
1405     {
1406         mask = WINED3DSP_WRITEMASK_0;
1407         *write_mask = '\0';
1408     }
1409     else
1410     {
1411         shader_glsl_write_mask_to_str(mask, write_mask);
1412     }
1413
1414     return mask;
1415 }
1416
1417 static unsigned int shader_glsl_get_write_mask_size(DWORD write_mask) {
1418     unsigned int size = 0;
1419
1420     if (write_mask & WINED3DSP_WRITEMASK_0) ++size;
1421     if (write_mask & WINED3DSP_WRITEMASK_1) ++size;
1422     if (write_mask & WINED3DSP_WRITEMASK_2) ++size;
1423     if (write_mask & WINED3DSP_WRITEMASK_3) ++size;
1424
1425     return size;
1426 }
1427
1428 static void shader_glsl_swizzle_to_str(const DWORD swizzle, BOOL fixup, DWORD mask, char *str)
1429 {
1430     /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
1431      * but addressed as "rgba". To fix this we need to swap the register's x
1432      * and z components. */
1433     const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
1434
1435     *str++ = '.';
1436     /* swizzle bits fields: wwzzyyxx */
1437     if (mask & WINED3DSP_WRITEMASK_0) *str++ = swizzle_chars[swizzle & 0x03];
1438     if (mask & WINED3DSP_WRITEMASK_1) *str++ = swizzle_chars[(swizzle >> 2) & 0x03];
1439     if (mask & WINED3DSP_WRITEMASK_2) *str++ = swizzle_chars[(swizzle >> 4) & 0x03];
1440     if (mask & WINED3DSP_WRITEMASK_3) *str++ = swizzle_chars[(swizzle >> 6) & 0x03];
1441     *str = '\0';
1442 }
1443
1444 static void shader_glsl_get_swizzle(const struct wined3d_shader_src_param *param,
1445         BOOL fixup, DWORD mask, char *swizzle_str)
1446 {
1447     if (shader_is_scalar(&param->reg))
1448         *swizzle_str = '\0';
1449     else
1450         shader_glsl_swizzle_to_str(param->swizzle, fixup, mask, swizzle_str);
1451 }
1452
1453 /* From a given parameter token, generate the corresponding GLSL string.
1454  * Also, return the actual register name and swizzle in case the
1455  * caller needs this information as well. */
1456 static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins,
1457         const struct wined3d_shader_src_param *wined3d_src, DWORD mask, glsl_src_param_t *glsl_src)
1458 {
1459     BOOL is_color = FALSE;
1460     char swizzle_str[6];
1461
1462     glsl_src->reg_name[0] = '\0';
1463     glsl_src->param_str[0] = '\0';
1464     swizzle_str[0] = '\0';
1465
1466     shader_glsl_get_register_name(&wined3d_src->reg, glsl_src->reg_name, &is_color, ins);
1467     shader_glsl_get_swizzle(wined3d_src, is_color, mask, swizzle_str);
1468     shader_glsl_gen_modifier(wined3d_src->modifiers, glsl_src->reg_name, swizzle_str, glsl_src->param_str);
1469 }
1470
1471 /* From a given parameter token, generate the corresponding GLSL string.
1472  * Also, return the actual register name and swizzle in case the
1473  * caller needs this information as well. */
1474 static DWORD shader_glsl_add_dst_param(const struct wined3d_shader_instruction *ins,
1475         const struct wined3d_shader_dst_param *wined3d_dst, glsl_dst_param_t *glsl_dst)
1476 {
1477     BOOL is_color = FALSE;
1478
1479     glsl_dst->mask_str[0] = '\0';
1480     glsl_dst->reg_name[0] = '\0';
1481
1482     shader_glsl_get_register_name(&wined3d_dst->reg, glsl_dst->reg_name, &is_color, ins);
1483     return shader_glsl_get_write_mask(wined3d_dst, glsl_dst->mask_str);
1484 }
1485
1486 /* Append the destination part of the instruction to the buffer, return the effective write mask */
1487 static DWORD shader_glsl_append_dst_ext(struct wined3d_shader_buffer *buffer,
1488         const struct wined3d_shader_instruction *ins, const struct wined3d_shader_dst_param *dst)
1489 {
1490     glsl_dst_param_t glsl_dst;
1491     DWORD mask;
1492
1493     mask = shader_glsl_add_dst_param(ins, dst, &glsl_dst);
1494     if (mask) shader_addline(buffer, "%s%s = %s(", glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]);
1495
1496     return mask;
1497 }
1498
1499 /* Append the destination part of the instruction to the buffer, return the effective write mask */
1500 static DWORD shader_glsl_append_dst(struct wined3d_shader_buffer *buffer, const struct wined3d_shader_instruction *ins)
1501 {
1502     return shader_glsl_append_dst_ext(buffer, ins, &ins->dst[0]);
1503 }
1504
1505 /** Process GLSL instruction modifiers */
1506 static void shader_glsl_add_instruction_modifiers(const struct wined3d_shader_instruction *ins)
1507 {
1508     glsl_dst_param_t dst_param;
1509     DWORD modifiers;
1510
1511     if (!ins->dst_count) return;
1512
1513     modifiers = ins->dst[0].modifiers;
1514     if (!modifiers) return;
1515
1516     shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
1517
1518     if (modifiers & WINED3DSPDM_SATURATE)
1519     {
1520         /* _SAT means to clamp the value of the register to between 0 and 1 */
1521         shader_addline(ins->ctx->buffer, "%s%s = clamp(%s%s, 0.0, 1.0);\n", dst_param.reg_name,
1522                 dst_param.mask_str, dst_param.reg_name, dst_param.mask_str);
1523     }
1524
1525     if (modifiers & WINED3DSPDM_MSAMPCENTROID)
1526     {
1527         FIXME("_centroid modifier not handled\n");
1528     }
1529
1530     if (modifiers & WINED3DSPDM_PARTIALPRECISION)
1531     {
1532         /* MSDN says this modifier can be safely ignored, so that's what we'll do. */
1533     }
1534 }
1535
1536 static inline const char *shader_get_comp_op(DWORD op)
1537 {
1538     switch (op) {
1539         case COMPARISON_GT: return ">";
1540         case COMPARISON_EQ: return "==";
1541         case COMPARISON_GE: return ">=";
1542         case COMPARISON_LT: return "<";
1543         case COMPARISON_NE: return "!=";
1544         case COMPARISON_LE: return "<=";
1545         default:
1546             FIXME("Unrecognized comparison value: %u\n", op);
1547             return "(\?\?)";
1548     }
1549 }
1550
1551 static void shader_glsl_get_sample_function(DWORD sampler_type, DWORD flags, glsl_sample_function_t *sample_function)
1552 {
1553     BOOL projected = flags & WINED3D_GLSL_SAMPLE_PROJECTED;
1554     BOOL texrect = flags & WINED3D_GLSL_SAMPLE_RECT;
1555     BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD;
1556     BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD;
1557
1558     /* Note that there's no such thing as a projected cube texture. */
1559     switch(sampler_type) {
1560         case WINED3DSTT_1D:
1561             if(lod) {
1562                 sample_function->name = projected ? "texture1DProjLod" : "texture1DLod";
1563             } else  if(grad) {
1564                 sample_function->name = projected ? "texture1DProjGradARB" : "texture1DGradARB";
1565             } else {
1566                 sample_function->name = projected ? "texture1DProj" : "texture1D";
1567             }
1568             sample_function->coord_mask = WINED3DSP_WRITEMASK_0;
1569             break;
1570         case WINED3DSTT_2D:
1571             if(texrect) {
1572                 if(lod) {
1573                     sample_function->name = projected ? "texture2DRectProjLod" : "texture2DRectLod";
1574                 } else  if(grad) {
1575                     /* What good are texrect grad functions? I don't know, but GL_EXT_gpu_shader4 defines them.
1576                     * There is no GL_ARB_shader_texture_lod spec yet, so I don't know if they're defined there
1577                      */
1578                     sample_function->name = projected ? "shadow2DRectProjGradARB" : "shadow2DRectGradARB";
1579                 } else {
1580                     sample_function->name = projected ? "texture2DRectProj" : "texture2DRect";
1581                 }
1582             } else {
1583                 if(lod) {
1584                     sample_function->name = projected ? "texture2DProjLod" : "texture2DLod";
1585                 } else  if(grad) {
1586                     sample_function->name = projected ? "texture2DProjGradARB" : "texture2DGradARB";
1587                 } else {
1588                     sample_function->name = projected ? "texture2DProj" : "texture2D";
1589                 }
1590             }
1591             sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1;
1592             break;
1593         case WINED3DSTT_CUBE:
1594             if(lod) {
1595                 sample_function->name = "textureCubeLod";
1596             } else if(grad) {
1597                 sample_function->name = "textureCubeGradARB";
1598             } else {
1599                 sample_function->name = "textureCube";
1600             }
1601             sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1602             break;
1603         case WINED3DSTT_VOLUME:
1604             if(lod) {
1605                 sample_function->name = projected ? "texture3DProjLod" : "texture3DLod";
1606             } else  if(grad) {
1607                 sample_function->name = projected ? "texture3DProjGradARB" : "texture3DGradARB";
1608             } else {
1609                 sample_function->name = projected ? "texture3DProj" : "texture3D";
1610             }
1611             sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1612             break;
1613         default:
1614             sample_function->name = "";
1615             sample_function->coord_mask = 0;
1616             FIXME("Unrecognized sampler type: %#x;\n", sampler_type);
1617             break;
1618     }
1619 }
1620
1621 static void shader_glsl_append_fixup_arg(char *arguments, const char *reg_name,
1622         BOOL sign_fixup, enum fixup_channel_source channel_source)
1623 {
1624     switch(channel_source)
1625     {
1626         case CHANNEL_SOURCE_ZERO:
1627             strcat(arguments, "0.0");
1628             break;
1629
1630         case CHANNEL_SOURCE_ONE:
1631             strcat(arguments, "1.0");
1632             break;
1633
1634         case CHANNEL_SOURCE_X:
1635             strcat(arguments, reg_name);
1636             strcat(arguments, ".x");
1637             break;
1638
1639         case CHANNEL_SOURCE_Y:
1640             strcat(arguments, reg_name);
1641             strcat(arguments, ".y");
1642             break;
1643
1644         case CHANNEL_SOURCE_Z:
1645             strcat(arguments, reg_name);
1646             strcat(arguments, ".z");
1647             break;
1648
1649         case CHANNEL_SOURCE_W:
1650             strcat(arguments, reg_name);
1651             strcat(arguments, ".w");
1652             break;
1653
1654         default:
1655             FIXME("Unhandled channel source %#x\n", channel_source);
1656             strcat(arguments, "undefined");
1657             break;
1658     }
1659
1660     if (sign_fixup) strcat(arguments, " * 2.0 - 1.0");
1661 }
1662
1663 static void shader_glsl_color_correction(const struct wined3d_shader_instruction *ins, struct color_fixup_desc fixup)
1664 {
1665     struct wined3d_shader_dst_param dst;
1666     unsigned int mask_size, remaining;
1667     glsl_dst_param_t dst_param;
1668     char arguments[256];
1669     DWORD mask;
1670
1671     mask = 0;
1672     if (fixup.x_sign_fixup || fixup.x_source != CHANNEL_SOURCE_X) mask |= WINED3DSP_WRITEMASK_0;
1673     if (fixup.y_sign_fixup || fixup.y_source != CHANNEL_SOURCE_Y) mask |= WINED3DSP_WRITEMASK_1;
1674     if (fixup.z_sign_fixup || fixup.z_source != CHANNEL_SOURCE_Z) mask |= WINED3DSP_WRITEMASK_2;
1675     if (fixup.w_sign_fixup || fixup.w_source != CHANNEL_SOURCE_W) mask |= WINED3DSP_WRITEMASK_3;
1676     mask &= ins->dst[0].write_mask;
1677
1678     if (!mask) return; /* Nothing to do */
1679
1680     if (is_yuv_fixup(fixup))
1681     {
1682         enum yuv_fixup yuv_fixup = get_yuv_fixup(fixup);
1683         FIXME("YUV fixup (%#x) not supported\n", yuv_fixup);
1684         return;
1685     }
1686
1687     mask_size = shader_glsl_get_write_mask_size(mask);
1688
1689     dst = ins->dst[0];
1690     dst.write_mask = mask;
1691     shader_glsl_add_dst_param(ins, &dst, &dst_param);
1692
1693     arguments[0] = '\0';
1694     remaining = mask_size;
1695     if (mask & WINED3DSP_WRITEMASK_0)
1696     {
1697         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.x_sign_fixup, fixup.x_source);
1698         if (--remaining) strcat(arguments, ", ");
1699     }
1700     if (mask & WINED3DSP_WRITEMASK_1)
1701     {
1702         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.y_sign_fixup, fixup.y_source);
1703         if (--remaining) strcat(arguments, ", ");
1704     }
1705     if (mask & WINED3DSP_WRITEMASK_2)
1706     {
1707         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.z_sign_fixup, fixup.z_source);
1708         if (--remaining) strcat(arguments, ", ");
1709     }
1710     if (mask & WINED3DSP_WRITEMASK_3)
1711     {
1712         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.w_sign_fixup, fixup.w_source);
1713         if (--remaining) strcat(arguments, ", ");
1714     }
1715
1716     if (mask_size > 1)
1717     {
1718         shader_addline(ins->ctx->buffer, "%s%s = vec%u(%s);\n",
1719                 dst_param.reg_name, dst_param.mask_str, mask_size, arguments);
1720     }
1721     else
1722     {
1723         shader_addline(ins->ctx->buffer, "%s%s = %s;\n", dst_param.reg_name, dst_param.mask_str, arguments);
1724     }
1725 }
1726
1727 static void PRINTF_ATTR(8, 9) shader_glsl_gen_sample_code(const struct wined3d_shader_instruction *ins,
1728         DWORD sampler, const glsl_sample_function_t *sample_function, DWORD swizzle,
1729         const char *dx, const char *dy,
1730         const char *bias, const char *coord_reg_fmt, ...)
1731 {
1732     const char *sampler_base;
1733     char dst_swizzle[6];
1734     struct color_fixup_desc fixup;
1735     BOOL np2_fixup = FALSE;
1736     va_list args;
1737
1738     shader_glsl_swizzle_to_str(swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle);
1739
1740     if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
1741     {
1742         const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
1743         fixup = priv->cur_ps_args->color_fixup[sampler];
1744         sampler_base = "Psampler";
1745
1746         if(priv->cur_ps_args->np2_fixup & (1 << sampler)) {
1747             if(bias) {
1748                 FIXME("Biased sampling from NP2 textures is unsupported\n");
1749             } else {
1750                 np2_fixup = TRUE;
1751             }
1752         }
1753     } else {
1754         sampler_base = "Vsampler";
1755         fixup = COLOR_FIXUP_IDENTITY; /* FIXME: Vshader color fixup */
1756     }
1757
1758     shader_glsl_append_dst(ins->ctx->buffer, ins);
1759
1760     shader_addline(ins->ctx->buffer, "%s(%s%u, ", sample_function->name, sampler_base, sampler);
1761
1762     va_start(args, coord_reg_fmt);
1763     shader_vaddline(ins->ctx->buffer, coord_reg_fmt, args);
1764     va_end(args);
1765
1766     if(bias) {
1767         shader_addline(ins->ctx->buffer, ", %s)%s);\n", bias, dst_swizzle);
1768     } else {
1769         if (np2_fixup) {
1770             const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
1771             const unsigned char idx = priv->cur_np2fixup_info->idx[sampler];
1772
1773             shader_addline(ins->ctx->buffer, " * PsamplerNP2Fixup[%u].%s)%s);\n", idx >> 1,
1774                            (idx % 2) ? "zw" : "xy", dst_swizzle);
1775         } else if(dx && dy) {
1776             shader_addline(ins->ctx->buffer, ", %s, %s)%s);\n", dx, dy, dst_swizzle);
1777         } else {
1778             shader_addline(ins->ctx->buffer, ")%s);\n", dst_swizzle);
1779         }
1780     }
1781
1782     if(!is_identity_fixup(fixup)) {
1783         shader_glsl_color_correction(ins, fixup);
1784     }
1785 }
1786
1787 /*****************************************************************************
1788  * Begin processing individual instruction opcodes
1789  ****************************************************************************/
1790
1791 /* Generate GLSL arithmetic functions (dst = src1 + src2) */
1792 static void shader_glsl_arith(const struct wined3d_shader_instruction *ins)
1793 {
1794     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
1795     glsl_src_param_t src0_param;
1796     glsl_src_param_t src1_param;
1797     DWORD write_mask;
1798     char op;
1799
1800     /* Determine the GLSL operator to use based on the opcode */
1801     switch (ins->handler_idx)
1802     {
1803         case WINED3DSIH_MUL: op = '*'; break;
1804         case WINED3DSIH_ADD: op = '+'; break;
1805         case WINED3DSIH_SUB: op = '-'; break;
1806         default:
1807             op = ' ';
1808             FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx);
1809             break;
1810     }
1811
1812     write_mask = shader_glsl_append_dst(buffer, ins);
1813     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
1814     shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
1815     shader_addline(buffer, "%s %c %s);\n", src0_param.param_str, op, src1_param.param_str);
1816 }
1817
1818 /* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
1819 static void shader_glsl_mov(const struct wined3d_shader_instruction *ins)
1820 {
1821     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
1822     glsl_src_param_t src0_param;
1823     DWORD write_mask;
1824
1825     write_mask = shader_glsl_append_dst(buffer, ins);
1826     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
1827
1828     /* In vs_1_1 WINED3DSIO_MOV can write to the address register. In later
1829      * shader versions WINED3DSIO_MOVA is used for this. */
1830     if (ins->ctx->reg_maps->shader_version.major == 1
1831             && !shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)
1832             && ins->dst[0].reg.type == WINED3DSPR_ADDR)
1833     {
1834         /* This is a simple floor() */
1835         unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
1836         if (mask_size > 1) {
1837             shader_addline(buffer, "ivec%d(floor(%s)));\n", mask_size, src0_param.param_str);
1838         } else {
1839             shader_addline(buffer, "int(floor(%s)));\n", src0_param.param_str);
1840         }
1841     }
1842     else if(ins->handler_idx == WINED3DSIH_MOVA)
1843     {
1844         /* We need to *round* to the nearest int here. */
1845         unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
1846         if (mask_size > 1) {
1847             shader_addline(buffer, "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s)));\n", mask_size, src0_param.param_str, mask_size, src0_param.param_str);
1848         } else {
1849             shader_addline(buffer, "int(floor(abs(%s) + 0.5) * sign(%s)));\n", src0_param.param_str, src0_param.param_str);
1850         }
1851     } else {
1852         shader_addline(buffer, "%s);\n", src0_param.param_str);
1853     }
1854 }
1855
1856 /* Process the dot product operators DP3 and DP4 in GLSL (dst = dot(src0, src1)) */
1857 static void shader_glsl_dot(const struct wined3d_shader_instruction *ins)
1858 {
1859     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
1860     glsl_src_param_t src0_param;
1861     glsl_src_param_t src1_param;
1862     DWORD dst_write_mask, src_write_mask;
1863     unsigned int dst_size = 0;
1864
1865     dst_write_mask = shader_glsl_append_dst(buffer, ins);
1866     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
1867
1868     /* dp3 works on vec3, dp4 on vec4 */
1869     if (ins->handler_idx == WINED3DSIH_DP4)
1870     {
1871         src_write_mask = WINED3DSP_WRITEMASK_ALL;
1872     } else {
1873         src_write_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1874     }
1875
1876     shader_glsl_add_src_param(ins, &ins->src[0], src_write_mask, &src0_param);
1877     shader_glsl_add_src_param(ins, &ins->src[1], src_write_mask, &src1_param);
1878
1879     if (dst_size > 1) {
1880         shader_addline(buffer, "vec%d(dot(%s, %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
1881     } else {
1882         shader_addline(buffer, "dot(%s, %s));\n", src0_param.param_str, src1_param.param_str);
1883     }
1884 }
1885
1886 /* Note that this instruction has some restrictions. The destination write mask
1887  * can't contain the w component, and the source swizzles have to be .xyzw */
1888 static void shader_glsl_cross(const struct wined3d_shader_instruction *ins)
1889 {
1890     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1891     glsl_src_param_t src0_param;
1892     glsl_src_param_t src1_param;
1893     char dst_mask[6];
1894
1895     shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
1896     shader_glsl_append_dst(ins->ctx->buffer, ins);
1897     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
1898     shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param);
1899     shader_addline(ins->ctx->buffer, "cross(%s, %s)%s);\n", src0_param.param_str, src1_param.param_str, dst_mask);
1900 }
1901
1902 /* Process the WINED3DSIO_POW instruction in GLSL (dst = |src0|^src1)
1903  * Src0 and src1 are scalars. Note that D3D uses the absolute of src0, while
1904  * GLSL uses the value as-is. */
1905 static void shader_glsl_pow(const struct wined3d_shader_instruction *ins)
1906 {
1907     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
1908     glsl_src_param_t src0_param;
1909     glsl_src_param_t src1_param;
1910     DWORD dst_write_mask;
1911     unsigned int dst_size;
1912
1913     dst_write_mask = shader_glsl_append_dst(buffer, ins);
1914     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
1915
1916     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
1917     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
1918
1919     if (dst_size > 1) {
1920         shader_addline(buffer, "vec%d(pow(abs(%s), %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
1921     } else {
1922         shader_addline(buffer, "pow(abs(%s), %s));\n", src0_param.param_str, src1_param.param_str);
1923     }
1924 }
1925
1926 /* Process the WINED3DSIO_LOG instruction in GLSL (dst = log2(|src0|))
1927  * Src0 is a scalar. Note that D3D uses the absolute of src0, while
1928  * GLSL uses the value as-is. */
1929 static void shader_glsl_log(const struct wined3d_shader_instruction *ins)
1930 {
1931     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
1932     glsl_src_param_t src0_param;
1933     DWORD dst_write_mask;
1934     unsigned int dst_size;
1935
1936     dst_write_mask = shader_glsl_append_dst(buffer, ins);
1937     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
1938
1939     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
1940
1941     if (dst_size > 1) {
1942         shader_addline(buffer, "vec%d(log2(abs(%s))));\n", dst_size, src0_param.param_str);
1943     } else {
1944         shader_addline(buffer, "log2(abs(%s)));\n", src0_param.param_str);
1945     }
1946 }
1947
1948 /* Map the opcode 1-to-1 to the GL code (arg->dst = instruction(src0, src1, ...) */
1949 static void shader_glsl_map2gl(const struct wined3d_shader_instruction *ins)
1950 {
1951     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
1952     glsl_src_param_t src_param;
1953     const char *instruction;
1954     DWORD write_mask;
1955     unsigned i;
1956
1957     /* Determine the GLSL function to use based on the opcode */
1958     /* TODO: Possibly make this a table for faster lookups */
1959     switch (ins->handler_idx)
1960     {
1961         case WINED3DSIH_MIN: instruction = "min"; break;
1962         case WINED3DSIH_MAX: instruction = "max"; break;
1963         case WINED3DSIH_ABS: instruction = "abs"; break;
1964         case WINED3DSIH_FRC: instruction = "fract"; break;
1965         case WINED3DSIH_NRM: instruction = "normalize"; break;
1966         case WINED3DSIH_EXP: instruction = "exp2"; break;
1967         case WINED3DSIH_DSX: instruction = "dFdx"; break;
1968         case WINED3DSIH_DSY: instruction = "ycorrection.y * dFdy"; break;
1969         default: instruction = "";
1970             FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx);
1971             break;
1972     }
1973
1974     write_mask = shader_glsl_append_dst(buffer, ins);
1975
1976     shader_addline(buffer, "%s(", instruction);
1977
1978     if (ins->src_count)
1979     {
1980         shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
1981         shader_addline(buffer, "%s", src_param.param_str);
1982         for (i = 1; i < ins->src_count; ++i)
1983         {
1984             shader_glsl_add_src_param(ins, &ins->src[i], write_mask, &src_param);
1985             shader_addline(buffer, ", %s", src_param.param_str);
1986         }
1987     }
1988
1989     shader_addline(buffer, "));\n");
1990 }
1991
1992 /** Process the WINED3DSIO_EXPP instruction in GLSL:
1993  * For shader model 1.x, do the following (and honor the writemask, so use a temporary variable):
1994  *   dst.x = 2^(floor(src))
1995  *   dst.y = src - floor(src)
1996  *   dst.z = 2^src   (partial precision is allowed, but optional)
1997  *   dst.w = 1.0;
1998  * For 2.0 shaders, just do this (honoring writemask and swizzle):
1999  *   dst = 2^src;    (partial precision is allowed, but optional)
2000  */
2001 static void shader_glsl_expp(const struct wined3d_shader_instruction *ins)
2002 {
2003     glsl_src_param_t src_param;
2004
2005     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src_param);
2006
2007     if (ins->ctx->reg_maps->shader_version.major < 2)
2008     {
2009         char dst_mask[6];
2010
2011         shader_addline(ins->ctx->buffer, "tmp0.x = exp2(floor(%s));\n", src_param.param_str);
2012         shader_addline(ins->ctx->buffer, "tmp0.y = %s - floor(%s);\n", src_param.param_str, src_param.param_str);
2013         shader_addline(ins->ctx->buffer, "tmp0.z = exp2(%s);\n", src_param.param_str);
2014         shader_addline(ins->ctx->buffer, "tmp0.w = 1.0;\n");
2015
2016         shader_glsl_append_dst(ins->ctx->buffer, ins);
2017         shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2018         shader_addline(ins->ctx->buffer, "tmp0%s);\n", dst_mask);
2019     } else {
2020         DWORD write_mask;
2021         unsigned int mask_size;
2022
2023         write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2024         mask_size = shader_glsl_get_write_mask_size(write_mask);
2025
2026         if (mask_size > 1) {
2027             shader_addline(ins->ctx->buffer, "vec%d(exp2(%s)));\n", mask_size, src_param.param_str);
2028         } else {
2029             shader_addline(ins->ctx->buffer, "exp2(%s));\n", src_param.param_str);
2030         }
2031     }
2032 }
2033
2034 /** Process the RCP (reciprocal or inverse) opcode in GLSL (dst = 1 / src) */
2035 static void shader_glsl_rcp(const struct wined3d_shader_instruction *ins)
2036 {
2037     glsl_src_param_t src_param;
2038     DWORD write_mask;
2039     unsigned int mask_size;
2040
2041     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2042     mask_size = shader_glsl_get_write_mask_size(write_mask);
2043     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param);
2044
2045     if (mask_size > 1) {
2046         shader_addline(ins->ctx->buffer, "vec%d(1.0 / %s));\n", mask_size, src_param.param_str);
2047     } else {
2048         shader_addline(ins->ctx->buffer, "1.0 / %s);\n", src_param.param_str);
2049     }
2050 }
2051
2052 static void shader_glsl_rsq(const struct wined3d_shader_instruction *ins)
2053 {
2054     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2055     glsl_src_param_t src_param;
2056     DWORD write_mask;
2057     unsigned int mask_size;
2058
2059     write_mask = shader_glsl_append_dst(buffer, ins);
2060     mask_size = shader_glsl_get_write_mask_size(write_mask);
2061
2062     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param);
2063
2064     if (mask_size > 1) {
2065         shader_addline(buffer, "vec%d(inversesqrt(%s)));\n", mask_size, src_param.param_str);
2066     } else {
2067         shader_addline(buffer, "inversesqrt(%s));\n", src_param.param_str);
2068     }
2069 }
2070
2071 /** Process signed comparison opcodes in GLSL. */
2072 static void shader_glsl_compare(const struct wined3d_shader_instruction *ins)
2073 {
2074     glsl_src_param_t src0_param;
2075     glsl_src_param_t src1_param;
2076     DWORD write_mask;
2077     unsigned int mask_size;
2078
2079     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2080     mask_size = shader_glsl_get_write_mask_size(write_mask);
2081     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2082     shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2083
2084     if (mask_size > 1) {
2085         const char *compare;
2086
2087         switch(ins->handler_idx)
2088         {
2089             case WINED3DSIH_SLT: compare = "lessThan"; break;
2090             case WINED3DSIH_SGE: compare = "greaterThanEqual"; break;
2091             default: compare = "";
2092                 FIXME("Can't handle opcode %#x\n", ins->handler_idx);
2093         }
2094
2095         shader_addline(ins->ctx->buffer, "vec%d(%s(%s, %s)));\n", mask_size, compare,
2096                 src0_param.param_str, src1_param.param_str);
2097     } else {
2098         switch(ins->handler_idx)
2099         {
2100             case WINED3DSIH_SLT:
2101                 /* Step(src0, src1) is not suitable here because if src0 == src1 SLT is supposed,
2102                  * to return 0.0 but step returns 1.0 because step is not < x
2103                  * An alternative is a bvec compare padded with an unused second component.
2104                  * step(src1 * -1.0, src0 * -1.0) is not an option because it suffers from the same
2105                  * issue. Playing with not() is not possible either because not() does not accept
2106                  * a scalar.
2107                  */
2108                 shader_addline(ins->ctx->buffer, "(%s < %s) ? 1.0 : 0.0);\n",
2109                         src0_param.param_str, src1_param.param_str);
2110                 break;
2111             case WINED3DSIH_SGE:
2112                 /* Here we can use the step() function and safe a conditional */
2113                 shader_addline(ins->ctx->buffer, "step(%s, %s));\n", src1_param.param_str, src0_param.param_str);
2114                 break;
2115             default:
2116                 FIXME("Can't handle opcode %#x\n", ins->handler_idx);
2117         }
2118
2119     }
2120 }
2121
2122 /** Process CMP instruction in GLSL (dst = src0 >= 0.0 ? src1 : src2), per channel */
2123 static void shader_glsl_cmp(const struct wined3d_shader_instruction *ins)
2124 {
2125     glsl_src_param_t src0_param;
2126     glsl_src_param_t src1_param;
2127     glsl_src_param_t src2_param;
2128     DWORD write_mask, cmp_channel = 0;
2129     unsigned int i, j;
2130     char mask_char[6];
2131     BOOL temp_destination = FALSE;
2132
2133     if (shader_is_scalar(&ins->src[0].reg))
2134     {
2135         write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2136
2137         shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
2138         shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2139         shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2140
2141         shader_addline(ins->ctx->buffer, "%s >= 0.0 ? %s : %s);\n",
2142                        src0_param.param_str, src1_param.param_str, src2_param.param_str);
2143     } else {
2144         DWORD dst_mask = ins->dst[0].write_mask;
2145         struct wined3d_shader_dst_param dst = ins->dst[0];
2146
2147         /* Cycle through all source0 channels */
2148         for (i=0; i<4; i++) {
2149             write_mask = 0;
2150             /* Find the destination channels which use the current source0 channel */
2151             for (j=0; j<4; j++) {
2152                 if (((ins->src[0].swizzle >> (2 * j)) & 0x3) == i)
2153                 {
2154                     write_mask |= WINED3DSP_WRITEMASK_0 << j;
2155                     cmp_channel = WINED3DSP_WRITEMASK_0 << j;
2156                 }
2157             }
2158             dst.write_mask = dst_mask & write_mask;
2159
2160             /* Splitting the cmp instruction up in multiple lines imposes a problem:
2161             * The first lines may overwrite source parameters of the following lines.
2162             * Deal with that by using a temporary destination register if needed
2163             */
2164             if ((ins->src[0].reg.idx == ins->dst[0].reg.idx
2165                     && ins->src[0].reg.type == ins->dst[0].reg.type)
2166                     || (ins->src[1].reg.idx == ins->dst[0].reg.idx
2167                     && ins->src[1].reg.type == ins->dst[0].reg.type)
2168                     || (ins->src[2].reg.idx == ins->dst[0].reg.idx
2169                     && ins->src[2].reg.type == ins->dst[0].reg.type))
2170             {
2171                 write_mask = shader_glsl_get_write_mask(&dst, mask_char);
2172                 if (!write_mask) continue;
2173                 shader_addline(ins->ctx->buffer, "tmp0%s = (", mask_char);
2174                 temp_destination = TRUE;
2175             } else {
2176                 write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst);
2177                 if (!write_mask) continue;
2178             }
2179
2180             shader_glsl_add_src_param(ins, &ins->src[0], cmp_channel, &src0_param);
2181             shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2182             shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2183
2184             shader_addline(ins->ctx->buffer, "%s >= 0.0 ? %s : %s);\n",
2185                         src0_param.param_str, src1_param.param_str, src2_param.param_str);
2186         }
2187
2188         if(temp_destination) {
2189             shader_glsl_get_write_mask(&ins->dst[0], mask_char);
2190             shader_glsl_append_dst(ins->ctx->buffer, ins);
2191             shader_addline(ins->ctx->buffer, "tmp0%s);\n", mask_char);
2192         }
2193     }
2194
2195 }
2196
2197 /** Process the CND opcode in GLSL (dst = (src0 > 0.5) ? src1 : src2) */
2198 /* For ps 1.1-1.3, only a single component of src0 is used. For ps 1.4
2199  * the compare is done per component of src0. */
2200 static void shader_glsl_cnd(const struct wined3d_shader_instruction *ins)
2201 {
2202     struct wined3d_shader_dst_param dst;
2203     glsl_src_param_t src0_param;
2204     glsl_src_param_t src1_param;
2205     glsl_src_param_t src2_param;
2206     DWORD write_mask, cmp_channel = 0;
2207     unsigned int i, j;
2208     DWORD dst_mask;
2209     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
2210             ins->ctx->reg_maps->shader_version.minor);
2211
2212     if (shader_version < WINED3D_SHADER_VERSION(1, 4))
2213     {
2214         write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2215         shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2216         shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2217         shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2218
2219         /* Fun: The D3DSI_COISSUE flag changes the semantic of the cnd instruction for < 1.4 shaders */
2220         if (ins->coissue)
2221         {
2222             shader_addline(ins->ctx->buffer, "%s /* COISSUE! */);\n", src1_param.param_str);
2223         } else {
2224             shader_addline(ins->ctx->buffer, "%s > 0.5 ? %s : %s);\n",
2225                     src0_param.param_str, src1_param.param_str, src2_param.param_str);
2226         }
2227         return;
2228     }
2229     /* Cycle through all source0 channels */
2230     dst_mask = ins->dst[0].write_mask;
2231     dst = ins->dst[0];
2232     for (i=0; i<4; i++) {
2233         write_mask = 0;
2234         /* Find the destination channels which use the current source0 channel */
2235         for (j=0; j<4; j++) {
2236             if (((ins->src[0].swizzle >> (2 * j)) & 0x3) == i)
2237             {
2238                 write_mask |= WINED3DSP_WRITEMASK_0 << j;
2239                 cmp_channel = WINED3DSP_WRITEMASK_0 << j;
2240             }
2241         }
2242
2243         dst.write_mask = dst_mask & write_mask;
2244         write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst);
2245         if (!write_mask) continue;
2246
2247         shader_glsl_add_src_param(ins, &ins->src[0], cmp_channel, &src0_param);
2248         shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2249         shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2250
2251         shader_addline(ins->ctx->buffer, "%s > 0.5 ? %s : %s);\n",
2252                 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2253     }
2254 }
2255
2256 /** GLSL code generation for WINED3DSIO_MAD: Multiply the first 2 opcodes, then add the last */
2257 static void shader_glsl_mad(const struct wined3d_shader_instruction *ins)
2258 {
2259     glsl_src_param_t src0_param;
2260     glsl_src_param_t src1_param;
2261     glsl_src_param_t src2_param;
2262     DWORD write_mask;
2263
2264     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2265     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2266     shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2267     shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2268     shader_addline(ins->ctx->buffer, "(%s * %s) + %s);\n",
2269             src0_param.param_str, src1_param.param_str, src2_param.param_str);
2270 }
2271
2272 /* Handles transforming all WINED3DSIO_M?x? opcodes for
2273    Vertex shaders to GLSL codes */
2274 static void shader_glsl_mnxn(const struct wined3d_shader_instruction *ins)
2275 {
2276     int i;
2277     int nComponents = 0;
2278     struct wined3d_shader_dst_param tmp_dst = {{0}};
2279     struct wined3d_shader_src_param tmp_src[2] = {{{0}}};
2280     struct wined3d_shader_instruction tmp_ins;
2281
2282     memset(&tmp_ins, 0, sizeof(tmp_ins));
2283
2284     /* Set constants for the temporary argument */
2285     tmp_ins.ctx = ins->ctx;
2286     tmp_ins.dst_count = 1;
2287     tmp_ins.dst = &tmp_dst;
2288     tmp_ins.src_count = 2;
2289     tmp_ins.src = tmp_src;
2290
2291     switch(ins->handler_idx)
2292     {
2293         case WINED3DSIH_M4x4:
2294             nComponents = 4;
2295             tmp_ins.handler_idx = WINED3DSIH_DP4;
2296             break;
2297         case WINED3DSIH_M4x3:
2298             nComponents = 3;
2299             tmp_ins.handler_idx = WINED3DSIH_DP4;
2300             break;
2301         case WINED3DSIH_M3x4:
2302             nComponents = 4;
2303             tmp_ins.handler_idx = WINED3DSIH_DP3;
2304             break;
2305         case WINED3DSIH_M3x3:
2306             nComponents = 3;
2307             tmp_ins.handler_idx = WINED3DSIH_DP3;
2308             break;
2309         case WINED3DSIH_M3x2:
2310             nComponents = 2;
2311             tmp_ins.handler_idx = WINED3DSIH_DP3;
2312             break;
2313         default:
2314             break;
2315     }
2316
2317     tmp_dst = ins->dst[0];
2318     tmp_src[0] = ins->src[0];
2319     tmp_src[1] = ins->src[1];
2320     for (i = 0; i < nComponents; ++i)
2321     {
2322         tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i;
2323         shader_glsl_dot(&tmp_ins);
2324         ++tmp_src[1].reg.idx;
2325     }
2326 }
2327
2328 /**
2329     The LRP instruction performs a component-wise linear interpolation
2330     between the second and third operands using the first operand as the
2331     blend factor.  Equation:  (dst = src2 + src0 * (src1 - src2))
2332     This is equivalent to mix(src2, src1, src0);
2333 */
2334 static void shader_glsl_lrp(const struct wined3d_shader_instruction *ins)
2335 {
2336     glsl_src_param_t src0_param;
2337     glsl_src_param_t src1_param;
2338     glsl_src_param_t src2_param;
2339     DWORD write_mask;
2340
2341     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2342
2343     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2344     shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2345     shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2346
2347     shader_addline(ins->ctx->buffer, "mix(%s, %s, %s));\n",
2348             src2_param.param_str, src1_param.param_str, src0_param.param_str);
2349 }
2350
2351 /** Process the WINED3DSIO_LIT instruction in GLSL:
2352  * dst.x = dst.w = 1.0
2353  * dst.y = (src0.x > 0) ? src0.x
2354  * dst.z = (src0.x > 0) ? ((src0.y > 0) ? pow(src0.y, src.w) : 0) : 0
2355  *                                        where src.w is clamped at +- 128
2356  */
2357 static void shader_glsl_lit(const struct wined3d_shader_instruction *ins)
2358 {
2359     glsl_src_param_t src0_param;
2360     glsl_src_param_t src1_param;
2361     glsl_src_param_t src3_param;
2362     char dst_mask[6];
2363
2364     shader_glsl_append_dst(ins->ctx->buffer, ins);
2365     shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2366
2367     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2368     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src1_param);
2369     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src3_param);
2370
2371     /* The sdk specifies the instruction like this
2372      * dst.x = 1.0;
2373      * if(src.x > 0.0) dst.y = src.x
2374      * else dst.y = 0.0.
2375      * if(src.x > 0.0 && src.y > 0.0) dst.z = pow(src.y, power);
2376      * else dst.z = 0.0;
2377      * dst.w = 1.0;
2378      *
2379      * Obviously that has quite a few conditionals in it which we don't like. So the first step is this:
2380      * dst.x = 1.0                                  ... No further explanation needed
2381      * dst.y = max(src.y, 0.0);                     ... If x < 0.0, use 0.0, otherwise x. Same as the conditional
2382      * dst.z = x > 0.0 ? pow(max(y, 0.0), p) : 0;   ... 0 ^ power is 0, and otherwise we use y anyway
2383      * dst.w = 1.0.                                 ... Nothing fancy.
2384      *
2385      * So we still have one conditional in there. So do this:
2386      * dst.z = pow(max(0.0, src.y) * step(0.0, src.x), power);
2387      *
2388      * step(0.0, x) will return 1 if src.x > 0.0, and 0 otherwise. So if y is 0 we get pow(0.0 * 1.0, power),
2389      * which sets dst.z to 0. If y > 0, but x = 0.0, we get pow(y * 0.0, power), which results in 0 too.
2390      * if both x and y are > 0, we get pow(y * 1.0, power), as it is supposed to
2391      */
2392     shader_addline(ins->ctx->buffer,
2393             "vec4(1.0, max(%s, 0.0), pow(max(0.0, %s) * step(0.0, %s), clamp(%s, -128.0, 128.0)), 1.0)%s);\n",
2394             src0_param.param_str, src1_param.param_str, src0_param.param_str, src3_param.param_str, dst_mask);
2395 }
2396
2397 /** Process the WINED3DSIO_DST instruction in GLSL:
2398  * dst.x = 1.0
2399  * dst.y = src0.x * src0.y
2400  * dst.z = src0.z
2401  * dst.w = src1.w
2402  */
2403 static void shader_glsl_dst(const struct wined3d_shader_instruction *ins)
2404 {
2405     glsl_src_param_t src0y_param;
2406     glsl_src_param_t src0z_param;
2407     glsl_src_param_t src1y_param;
2408     glsl_src_param_t src1w_param;
2409     char dst_mask[6];
2410
2411     shader_glsl_append_dst(ins->ctx->buffer, ins);
2412     shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2413
2414     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src0y_param);
2415     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &src0z_param);
2416     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_1, &src1y_param);
2417     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_3, &src1w_param);
2418
2419     shader_addline(ins->ctx->buffer, "vec4(1.0, %s * %s, %s, %s))%s;\n",
2420             src0y_param.param_str, src1y_param.param_str, src0z_param.param_str, src1w_param.param_str, dst_mask);
2421 }
2422
2423 /** Process the WINED3DSIO_SINCOS instruction in GLSL:
2424  * VS 2.0 requires that specific cosine and sine constants be passed to this instruction so the hardware
2425  * can handle it.  But, these functions are built-in for GLSL, so we can just ignore the last 2 params.
2426  *
2427  * dst.x = cos(src0.?)
2428  * dst.y = sin(src0.?)
2429  * dst.z = dst.z
2430  * dst.w = dst.w
2431  */
2432 static void shader_glsl_sincos(const struct wined3d_shader_instruction *ins)
2433 {
2434     glsl_src_param_t src0_param;
2435     DWORD write_mask;
2436
2437     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2438     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2439
2440     switch (write_mask) {
2441         case WINED3DSP_WRITEMASK_0:
2442             shader_addline(ins->ctx->buffer, "cos(%s));\n", src0_param.param_str);
2443             break;
2444
2445         case WINED3DSP_WRITEMASK_1:
2446             shader_addline(ins->ctx->buffer, "sin(%s));\n", src0_param.param_str);
2447             break;
2448
2449         case (WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1):
2450             shader_addline(ins->ctx->buffer, "vec2(cos(%s), sin(%s)));\n", src0_param.param_str, src0_param.param_str);
2451             break;
2452
2453         default:
2454             ERR("Write mask should be .x, .y or .xy\n");
2455             break;
2456     }
2457 }
2458
2459 /* sgn in vs_2_0 has 2 extra parameters(registers for temporary storage) which we don't use
2460  * here. But those extra parameters require a dedicated function for sgn, since map2gl would
2461  * generate invalid code
2462  */
2463 static void shader_glsl_sgn(const struct wined3d_shader_instruction *ins)
2464 {
2465     glsl_src_param_t src0_param;
2466     DWORD write_mask;
2467
2468     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2469     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2470
2471     shader_addline(ins->ctx->buffer, "sign(%s));\n", src0_param.param_str);
2472 }
2473
2474 /** Process the WINED3DSIO_LOOP instruction in GLSL:
2475  * Start a for() loop where src1.y is the initial value of aL,
2476  *  increment aL by src1.z for a total of src1.x iterations.
2477  *  Need to use a temporary variable for this operation.
2478  */
2479 /* FIXME: I don't think nested loops will work correctly this way. */
2480 static void shader_glsl_loop(const struct wined3d_shader_instruction *ins)
2481 {
2482     glsl_src_param_t src1_param;
2483     IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
2484     const DWORD *control_values = NULL;
2485     const local_constant *constant;
2486
2487     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_ALL, &src1_param);
2488
2489     /* Try to hardcode the loop control parameters if possible. Direct3D 9 class hardware doesn't support real
2490      * varying indexing, but Microsoft designed this feature for Shader model 2.x+. If the loop control is
2491      * known at compile time, the GLSL compiler can unroll the loop, and replace indirect addressing with direct
2492      * addressing.
2493      */
2494     if (ins->src[1].reg.type == WINED3DSPR_CONSTINT)
2495     {
2496         LIST_FOR_EACH_ENTRY(constant, &shader->baseShader.constantsI, local_constant, entry) {
2497             if (constant->idx == ins->src[1].reg.idx)
2498             {
2499                 control_values = constant->value;
2500                 break;
2501             }
2502         }
2503     }
2504
2505     if (control_values)
2506     {
2507         struct wined3d_shader_loop_control loop_control;
2508         loop_control.count = control_values[0];
2509         loop_control.start = control_values[1];
2510         loop_control.step = (int)control_values[2];
2511
2512         if (loop_control.step > 0)
2513         {
2514             shader_addline(ins->ctx->buffer, "for (aL%u = %u; aL%u < (%u * %d + %u); aL%u += %d) {\n",
2515                     shader->baseShader.cur_loop_depth, loop_control.start,
2516                     shader->baseShader.cur_loop_depth, loop_control.count, loop_control.step, loop_control.start,
2517                     shader->baseShader.cur_loop_depth, loop_control.step);
2518         }
2519         else if (loop_control.step < 0)
2520         {
2521             shader_addline(ins->ctx->buffer, "for (aL%u = %u; aL%u > (%u * %d + %u); aL%u += %d) {\n",
2522                     shader->baseShader.cur_loop_depth, loop_control.start,
2523                     shader->baseShader.cur_loop_depth, loop_control.count, loop_control.step, loop_control.start,
2524                     shader->baseShader.cur_loop_depth, loop_control.step);
2525         }
2526         else
2527         {
2528             shader_addline(ins->ctx->buffer, "for (aL%u = %u, tmpInt%u = 0; tmpInt%u < %u; tmpInt%u++) {\n",
2529                     shader->baseShader.cur_loop_depth, loop_control.start, shader->baseShader.cur_loop_depth,
2530                     shader->baseShader.cur_loop_depth, loop_control.count,
2531                     shader->baseShader.cur_loop_depth);
2532         }
2533     } else {
2534         shader_addline(ins->ctx->buffer,
2535                 "for (tmpInt%u = 0, aL%u = %s.y; tmpInt%u < %s.x; tmpInt%u++, aL%u += %s.z) {\n",
2536                 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_regno,
2537                 src1_param.reg_name, shader->baseShader.cur_loop_depth, src1_param.reg_name,
2538                 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_regno, src1_param.reg_name);
2539     }
2540
2541     shader->baseShader.cur_loop_depth++;
2542     shader->baseShader.cur_loop_regno++;
2543 }
2544
2545 static void shader_glsl_end(const struct wined3d_shader_instruction *ins)
2546 {
2547     IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
2548
2549     shader_addline(ins->ctx->buffer, "}\n");
2550
2551     if (ins->handler_idx == WINED3DSIH_ENDLOOP)
2552     {
2553         shader->baseShader.cur_loop_depth--;
2554         shader->baseShader.cur_loop_regno--;
2555     }
2556
2557     if (ins->handler_idx == WINED3DSIH_ENDREP)
2558     {
2559         shader->baseShader.cur_loop_depth--;
2560     }
2561 }
2562
2563 static void shader_glsl_rep(const struct wined3d_shader_instruction *ins)
2564 {
2565     IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
2566     glsl_src_param_t src0_param;
2567     const DWORD *control_values = NULL;
2568     const local_constant *constant;
2569
2570     /* Try to hardcode local values to help the GLSL compiler to unroll and optimize the loop */
2571     if (ins->src[0].reg.type == WINED3DSPR_CONSTINT)
2572     {
2573         LIST_FOR_EACH_ENTRY(constant, &shader->baseShader.constantsI, local_constant, entry)
2574         {
2575             if (constant->idx == ins->src[0].reg.idx)
2576             {
2577                 control_values = constant->value;
2578                 break;
2579             }
2580         }
2581     }
2582
2583     if(control_values) {
2584         shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %d; tmpInt%d++) {\n",
2585                        shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_depth,
2586                        control_values[0], shader->baseShader.cur_loop_depth);
2587     } else {
2588         shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2589         shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %s; tmpInt%d++) {\n",
2590                 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_depth,
2591                 src0_param.param_str, shader->baseShader.cur_loop_depth);
2592     }
2593     shader->baseShader.cur_loop_depth++;
2594 }
2595
2596 static void shader_glsl_if(const struct wined3d_shader_instruction *ins)
2597 {
2598     glsl_src_param_t src0_param;
2599
2600     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2601     shader_addline(ins->ctx->buffer, "if (%s) {\n", src0_param.param_str);
2602 }
2603
2604 static void shader_glsl_ifc(const struct wined3d_shader_instruction *ins)
2605 {
2606     glsl_src_param_t src0_param;
2607     glsl_src_param_t src1_param;
2608
2609     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2610     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
2611
2612     shader_addline(ins->ctx->buffer, "if (%s %s %s) {\n",
2613             src0_param.param_str, shader_get_comp_op(ins->flags), src1_param.param_str);
2614 }
2615
2616 static void shader_glsl_else(const struct wined3d_shader_instruction *ins)
2617 {
2618     shader_addline(ins->ctx->buffer, "} else {\n");
2619 }
2620
2621 static void shader_glsl_break(const struct wined3d_shader_instruction *ins)
2622 {
2623     shader_addline(ins->ctx->buffer, "break;\n");
2624 }
2625
2626 /* FIXME: According to MSDN the compare is done per component. */
2627 static void shader_glsl_breakc(const struct wined3d_shader_instruction *ins)
2628 {
2629     glsl_src_param_t src0_param;
2630     glsl_src_param_t src1_param;
2631
2632     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2633     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
2634
2635     shader_addline(ins->ctx->buffer, "if (%s %s %s) break;\n",
2636             src0_param.param_str, shader_get_comp_op(ins->flags), src1_param.param_str);
2637 }
2638
2639 static void shader_glsl_label(const struct wined3d_shader_instruction *ins)
2640 {
2641     shader_addline(ins->ctx->buffer, "}\n");
2642     shader_addline(ins->ctx->buffer, "void subroutine%u () {\n",  ins->src[0].reg.idx);
2643 }
2644
2645 static void shader_glsl_call(const struct wined3d_shader_instruction *ins)
2646 {
2647     shader_addline(ins->ctx->buffer, "subroutine%u();\n", ins->src[0].reg.idx);
2648 }
2649
2650 static void shader_glsl_callnz(const struct wined3d_shader_instruction *ins)
2651 {
2652     glsl_src_param_t src1_param;
2653
2654     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
2655     shader_addline(ins->ctx->buffer, "if (%s) subroutine%u();\n", src1_param.param_str, ins->src[0].reg.idx);
2656 }
2657
2658 static void shader_glsl_ret(const struct wined3d_shader_instruction *ins)
2659 {
2660     /* No-op. The closing } is written when a new function is started, and at the end of the shader. This
2661      * function only suppresses the unhandled instruction warning
2662      */
2663 }
2664
2665 /*********************************************
2666  * Pixel Shader Specific Code begins here
2667  ********************************************/
2668 static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
2669 {
2670     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
2671     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
2672     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
2673             ins->ctx->reg_maps->shader_version.minor);
2674     glsl_sample_function_t sample_function;
2675     DWORD sample_flags = 0;
2676     WINED3DSAMPLER_TEXTURE_TYPE sampler_type;
2677     DWORD sampler_idx;
2678     DWORD mask = 0, swizzle;
2679
2680     /* 1.0-1.4: Use destination register as sampler source.
2681      * 2.0+: Use provided sampler source. */
2682     if (shader_version < WINED3D_SHADER_VERSION(2,0)) sampler_idx = ins->dst[0].reg.idx;
2683     else sampler_idx = ins->src[1].reg.idx;
2684     sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
2685
2686     if (shader_version < WINED3D_SHADER_VERSION(1,4))
2687     {
2688         DWORD flags = deviceImpl->stateBlock->textureState[sampler_idx][WINED3DTSS_TEXTURETRANSFORMFLAGS];
2689
2690         /* Projected cube textures don't make a lot of sense, the resulting coordinates stay the same. */
2691         if (flags & WINED3DTTFF_PROJECTED && sampler_type != WINED3DSTT_CUBE) {
2692             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
2693             switch (flags & ~WINED3DTTFF_PROJECTED) {
2694                 case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
2695                 case WINED3DTTFF_COUNT2: mask = WINED3DSP_WRITEMASK_1; break;
2696                 case WINED3DTTFF_COUNT3: mask = WINED3DSP_WRITEMASK_2; break;
2697                 case WINED3DTTFF_COUNT4:
2698                 case WINED3DTTFF_DISABLE: mask = WINED3DSP_WRITEMASK_3; break;
2699             }
2700         }
2701     }
2702     else if (shader_version < WINED3D_SHADER_VERSION(2,0))
2703     {
2704         DWORD src_mod = ins->src[0].modifiers;
2705
2706         if (src_mod == WINED3DSPSM_DZ) {
2707             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
2708             mask = WINED3DSP_WRITEMASK_2;
2709         } else if (src_mod == WINED3DSPSM_DW) {
2710             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
2711             mask = WINED3DSP_WRITEMASK_3;
2712         }
2713     } else {
2714         if (ins->flags & WINED3DSI_TEXLD_PROJECT)
2715         {
2716             /* ps 2.0 texldp instruction always divides by the fourth component. */
2717             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
2718             mask = WINED3DSP_WRITEMASK_3;
2719         }
2720     }
2721
2722     if(deviceImpl->stateBlock->textures[sampler_idx] &&
2723        IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
2724         sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
2725     }
2726
2727     shader_glsl_get_sample_function(sampler_type, sample_flags, &sample_function);
2728     mask |= sample_function.coord_mask;
2729
2730     if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE;
2731     else swizzle = ins->src[1].swizzle;
2732
2733     /* 1.0-1.3: Use destination register as coordinate source.
2734        1.4+: Use provided coordinate source register. */
2735     if (shader_version < WINED3D_SHADER_VERSION(1,4))
2736     {
2737         char coord_mask[6];
2738         shader_glsl_write_mask_to_str(mask, coord_mask);
2739         shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL,
2740                 "T%u%s", sampler_idx, coord_mask);
2741     } else {
2742         glsl_src_param_t coord_param;
2743         shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param);
2744         if (ins->flags & WINED3DSI_TEXLD_BIAS)
2745         {
2746             glsl_src_param_t bias;
2747             shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias);
2748             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, bias.param_str,
2749                     "%s", coord_param.param_str);
2750         } else {
2751             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL,
2752                     "%s", coord_param.param_str);
2753         }
2754     }
2755 }
2756
2757 static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins)
2758 {
2759     IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
2760     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
2761     const struct wined3d_gl_info *gl_info = &deviceImpl->adapter->gl_info;
2762     glsl_sample_function_t sample_function;
2763     glsl_src_param_t coord_param, dx_param, dy_param;
2764     DWORD sample_flags = WINED3D_GLSL_SAMPLE_GRAD;
2765     DWORD sampler_type;
2766     DWORD sampler_idx;
2767     DWORD swizzle = ins->src[1].swizzle;
2768
2769     if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD])
2770     {
2771         FIXME("texldd used, but not supported by hardware. Falling back to regular tex\n");
2772         return shader_glsl_tex(ins);
2773     }
2774
2775     sampler_idx = ins->src[1].reg.idx;
2776     sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
2777     if(deviceImpl->stateBlock->textures[sampler_idx] &&
2778        IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
2779         sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
2780     }
2781
2782     shader_glsl_get_sample_function(sampler_type, sample_flags, &sample_function);
2783     shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
2784     shader_glsl_add_src_param(ins, &ins->src[2], sample_function.coord_mask, &dx_param);
2785     shader_glsl_add_src_param(ins, &ins->src[3], sample_function.coord_mask, &dy_param);
2786
2787     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, dx_param.param_str, dy_param.param_str, NULL,
2788                                 "%s", coord_param.param_str);
2789 }
2790
2791 static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins)
2792 {
2793     IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
2794     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
2795     const struct wined3d_gl_info *gl_info = &deviceImpl->adapter->gl_info;
2796     glsl_sample_function_t sample_function;
2797     glsl_src_param_t coord_param, lod_param;
2798     DWORD sample_flags = WINED3D_GLSL_SAMPLE_LOD;
2799     DWORD sampler_type;
2800     DWORD sampler_idx;
2801     DWORD swizzle = ins->src[1].swizzle;
2802
2803     sampler_idx = ins->src[1].reg.idx;
2804     sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
2805     if(deviceImpl->stateBlock->textures[sampler_idx] &&
2806        IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
2807         sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
2808     }
2809     shader_glsl_get_sample_function(sampler_type, sample_flags, &sample_function);
2810     shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
2811
2812     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param);
2813
2814     if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD]
2815             && shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
2816     {
2817         /* The GLSL spec claims the Lod sampling functions are only supported in vertex shaders.
2818          * However, they seem to work just fine in fragment shaders as well. */
2819         WARN("Using %s in fragment shader.\n", sample_function.name);
2820     }
2821     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str,
2822             "%s", coord_param.param_str);
2823 }
2824
2825 static void shader_glsl_texcoord(const struct wined3d_shader_instruction *ins)
2826 {
2827     /* FIXME: Make this work for more than just 2D textures */
2828     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2829     DWORD write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2830
2831     if (!(ins->ctx->reg_maps->shader_version.major == 1 && ins->ctx->reg_maps->shader_version.minor == 4))
2832     {
2833         char dst_mask[6];
2834
2835         shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2836         shader_addline(buffer, "clamp(gl_TexCoord[%u], 0.0, 1.0)%s);\n",
2837                 ins->dst[0].reg.idx, dst_mask);
2838     } else {
2839         DWORD reg = ins->src[0].reg.idx;
2840         DWORD src_mod = ins->src[0].modifiers;
2841         char dst_swizzle[6];
2842
2843         shader_glsl_get_swizzle(&ins->src[0], FALSE, write_mask, dst_swizzle);
2844
2845         if (src_mod == WINED3DSPSM_DZ) {
2846             glsl_src_param_t div_param;
2847             unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2848             shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &div_param);
2849
2850             if (mask_size > 1) {
2851                 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
2852             } else {
2853                 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
2854             }
2855         } else if (src_mod == WINED3DSPSM_DW) {
2856             glsl_src_param_t div_param;
2857             unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2858             shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &div_param);
2859
2860             if (mask_size > 1) {
2861                 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
2862             } else {
2863                 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
2864             }
2865         } else {
2866             shader_addline(buffer, "gl_TexCoord[%u]%s);\n", reg, dst_swizzle);
2867         }
2868     }
2869 }
2870
2871 /** Process the WINED3DSIO_TEXDP3TEX instruction in GLSL:
2872  * Take a 3-component dot product of the TexCoord[dstreg] and src,
2873  * then perform a 1D texture lookup from stage dstregnum, place into dst. */
2874 static void shader_glsl_texdp3tex(const struct wined3d_shader_instruction *ins)
2875 {
2876     glsl_src_param_t src0_param;
2877     glsl_sample_function_t sample_function;
2878     DWORD sampler_idx = ins->dst[0].reg.idx;
2879     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2880     WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
2881     UINT mask_size;
2882
2883     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
2884
2885     /* Do I have to take care about the projected bit? I don't think so, since the dp3 returns only one
2886      * scalar, and projected sampling would require 4.
2887      *
2888      * It is a dependent read - not valid with conditional NP2 textures
2889      */
2890     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
2891     mask_size = shader_glsl_get_write_mask_size(sample_function.coord_mask);
2892
2893     switch(mask_size)
2894     {
2895         case 1:
2896             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
2897                     "dot(gl_TexCoord[%u].xyz, %s)", sampler_idx, src0_param.param_str);
2898             break;
2899
2900         case 2:
2901             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
2902                     "vec2(dot(gl_TexCoord[%u].xyz, %s), 0.0)", sampler_idx, src0_param.param_str);
2903             break;
2904
2905         case 3:
2906             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
2907                     "vec3(dot(gl_TexCoord[%u].xyz, %s), 0.0, 0.0)", sampler_idx, src0_param.param_str);
2908             break;
2909
2910         default:
2911             FIXME("Unexpected mask size %u\n", mask_size);
2912             break;
2913     }
2914 }
2915
2916 /** Process the WINED3DSIO_TEXDP3 instruction in GLSL:
2917  * Take a 3-component dot product of the TexCoord[dstreg] and src. */
2918 static void shader_glsl_texdp3(const struct wined3d_shader_instruction *ins)
2919 {
2920     glsl_src_param_t src0_param;
2921     DWORD dstreg = ins->dst[0].reg.idx;
2922     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2923     DWORD dst_mask;
2924     unsigned int mask_size;
2925
2926     dst_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2927     mask_size = shader_glsl_get_write_mask_size(dst_mask);
2928     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
2929
2930     if (mask_size > 1) {
2931         shader_addline(ins->ctx->buffer, "vec%d(dot(T%u.xyz, %s)));\n", mask_size, dstreg, src0_param.param_str);
2932     } else {
2933         shader_addline(ins->ctx->buffer, "dot(T%u.xyz, %s));\n", dstreg, src0_param.param_str);
2934     }
2935 }
2936
2937 /** Process the WINED3DSIO_TEXDEPTH instruction in GLSL:
2938  * Calculate the depth as dst.x / dst.y   */
2939 static void shader_glsl_texdepth(const struct wined3d_shader_instruction *ins)
2940 {
2941     glsl_dst_param_t dst_param;
2942
2943     shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
2944
2945     /* Tests show that texdepth never returns anything below 0.0, and that r5.y is clamped to 1.0.
2946      * Negative input is accepted, -0.25 / -0.5 returns 0.5. GL should clamp gl_FragDepth to [0;1], but
2947      * this doesn't always work, so clamp the results manually. Whether or not the x value is clamped at 1
2948      * too is irrelevant, since if x = 0, any y value < 1.0 (and > 1.0 is not allowed) results in a result
2949      * >= 1.0 or < 0.0
2950      */
2951     shader_addline(ins->ctx->buffer, "gl_FragDepth = clamp((%s.x / min(%s.y, 1.0)), 0.0, 1.0);\n",
2952             dst_param.reg_name, dst_param.reg_name);
2953 }
2954
2955 /** Process the WINED3DSIO_TEXM3X2DEPTH instruction in GLSL:
2956  * Last row of a 3x2 matrix multiply, use the result to calculate the depth:
2957  * Calculate tmp0.y = TexCoord[dstreg] . src.xyz;  (tmp0.x has already been calculated)
2958  * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y
2959  */
2960 static void shader_glsl_texm3x2depth(const struct wined3d_shader_instruction *ins)
2961 {
2962     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2963     DWORD dstreg = ins->dst[0].reg.idx;
2964     glsl_src_param_t src0_param;
2965
2966     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
2967
2968     shader_addline(ins->ctx->buffer, "tmp0.y = dot(T%u.xyz, %s);\n", dstreg, src0_param.param_str);
2969     shader_addline(ins->ctx->buffer, "gl_FragDepth = (tmp0.y == 0.0) ? 1.0 : clamp(tmp0.x / tmp0.y, 0.0, 1.0);\n");
2970 }
2971
2972 /** Process the WINED3DSIO_TEXM3X2PAD instruction in GLSL
2973  * Calculate the 1st of a 2-row matrix multiplication. */
2974 static void shader_glsl_texm3x2pad(const struct wined3d_shader_instruction *ins)
2975 {
2976     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2977     DWORD reg = ins->dst[0].reg.idx;
2978     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2979     glsl_src_param_t src0_param;
2980
2981     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
2982     shader_addline(buffer, "tmp0.x = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
2983 }
2984
2985 /** Process the WINED3DSIO_TEXM3X3PAD instruction in GLSL
2986  * Calculate the 1st or 2nd row of a 3-row matrix multiplication. */
2987 static void shader_glsl_texm3x3pad(const struct wined3d_shader_instruction *ins)
2988 {
2989     IWineD3DPixelShaderImpl *shader = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
2990     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2991     DWORD reg = ins->dst[0].reg.idx;
2992     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2993     SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
2994     glsl_src_param_t src0_param;
2995
2996     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
2997     shader_addline(buffer, "tmp0.%c = dot(T%u.xyz, %s);\n", 'x' + current_state->current_row, reg, src0_param.param_str);
2998     current_state->texcoord_w[current_state->current_row++] = reg;
2999 }
3000
3001 static void shader_glsl_texm3x2tex(const struct wined3d_shader_instruction *ins)
3002 {
3003     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3004     DWORD reg = ins->dst[0].reg.idx;
3005     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3006     glsl_src_param_t src0_param;
3007     WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[reg];
3008     glsl_sample_function_t sample_function;
3009
3010     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3011     shader_addline(buffer, "tmp0.y = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3012
3013     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
3014
3015     /* Sample the texture using the calculated coordinates */
3016     shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xy");
3017 }
3018
3019 /** Process the WINED3DSIO_TEXM3X3TEX instruction in GLSL
3020  * Perform the 3rd row of a 3x3 matrix multiply, then sample the texture using the calculated coordinates */
3021 static void shader_glsl_texm3x3tex(const struct wined3d_shader_instruction *ins)
3022 {
3023     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3024     glsl_src_param_t src0_param;
3025     DWORD reg = ins->dst[0].reg.idx;
3026     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
3027     SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
3028     WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[reg];
3029     glsl_sample_function_t sample_function;
3030
3031     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3032     shader_addline(ins->ctx->buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3033
3034     /* Dependent read, not valid with conditional NP2 */
3035     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
3036
3037     /* Sample the texture using the calculated coordinates */
3038     shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
3039
3040     current_state->current_row = 0;
3041 }
3042
3043 /** Process the WINED3DSIO_TEXM3X3 instruction in GLSL
3044  * Perform the 3rd row of a 3x3 matrix multiply */
3045 static void shader_glsl_texm3x3(const struct wined3d_shader_instruction *ins)
3046 {
3047     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3048     glsl_src_param_t src0_param;
3049     char dst_mask[6];
3050     DWORD reg = ins->dst[0].reg.idx;
3051     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
3052     SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
3053
3054     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3055
3056     shader_glsl_append_dst(ins->ctx->buffer, ins);
3057     shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3058     shader_addline(ins->ctx->buffer, "vec4(tmp0.xy, dot(T%u.xyz, %s), 1.0)%s);\n", reg, src0_param.param_str, dst_mask);
3059
3060     current_state->current_row = 0;
3061 }
3062
3063 /* Process the WINED3DSIO_TEXM3X3SPEC instruction in GLSL
3064  * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
3065 static void shader_glsl_texm3x3spec(const struct wined3d_shader_instruction *ins)
3066 {
3067     IWineD3DPixelShaderImpl *shader = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
3068     DWORD reg = ins->dst[0].reg.idx;
3069     glsl_src_param_t src0_param;
3070     glsl_src_param_t src1_param;
3071     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3072     SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
3073     WINED3DSAMPLER_TEXTURE_TYPE stype = ins->ctx->reg_maps->sampler_type[reg];
3074     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3075     glsl_sample_function_t sample_function;
3076
3077     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3078     shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param);
3079
3080     /* Perform the last matrix multiply operation */
3081     shader_addline(buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3082     /* Reflection calculation */
3083     shader_addline(buffer, "tmp0.xyz = -reflect((%s), normalize(tmp0.xyz));\n", src1_param.param_str);
3084
3085     /* Dependent read, not valid with conditional NP2 */
3086     shader_glsl_get_sample_function(stype, 0, &sample_function);
3087
3088     /* Sample the texture */
3089     shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
3090
3091     current_state->current_row = 0;
3092 }
3093
3094 /* Process the WINED3DSIO_TEXM3X3VSPEC instruction in GLSL
3095  * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
3096 static void shader_glsl_texm3x3vspec(const struct wined3d_shader_instruction *ins)
3097 {
3098     IWineD3DPixelShaderImpl *shader = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
3099     DWORD reg = ins->dst[0].reg.idx;
3100     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3101     SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
3102     glsl_src_param_t src0_param;
3103     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3104     WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[reg];
3105     glsl_sample_function_t sample_function;
3106
3107     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3108
3109     /* Perform the last matrix multiply operation */
3110     shader_addline(buffer, "tmp0.z = dot(vec3(T%u), vec3(%s));\n", reg, src0_param.param_str);
3111
3112     /* Construct the eye-ray vector from w coordinates */
3113     shader_addline(buffer, "tmp1.xyz = normalize(vec3(gl_TexCoord[%u].w, gl_TexCoord[%u].w, gl_TexCoord[%u].w));\n",
3114             current_state->texcoord_w[0], current_state->texcoord_w[1], reg);
3115     shader_addline(buffer, "tmp0.xyz = -reflect(tmp1.xyz, normalize(tmp0.xyz));\n");
3116
3117     /* Dependent read, not valid with conditional NP2 */
3118     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
3119
3120     /* Sample the texture using the calculated coordinates */
3121     shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
3122
3123     current_state->current_row = 0;
3124 }
3125
3126 /** Process the WINED3DSIO_TEXBEM instruction in GLSL.
3127  * Apply a fake bump map transform.
3128  * texbem is pshader <= 1.3 only, this saves a few version checks
3129  */
3130 static void shader_glsl_texbem(const struct wined3d_shader_instruction *ins)
3131 {
3132     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
3133     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
3134     glsl_sample_function_t sample_function;
3135     glsl_src_param_t coord_param;
3136     WINED3DSAMPLER_TEXTURE_TYPE sampler_type;
3137     DWORD sampler_idx;
3138     DWORD mask;
3139     DWORD flags;
3140     char coord_mask[6];
3141
3142     sampler_idx = ins->dst[0].reg.idx;
3143     flags = deviceImpl->stateBlock->textureState[sampler_idx][WINED3DTSS_TEXTURETRANSFORMFLAGS];
3144
3145     sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3146     /* Dependent read, not valid with conditional NP2 */
3147     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
3148     mask = sample_function.coord_mask;
3149
3150     shader_glsl_write_mask_to_str(mask, coord_mask);
3151
3152     /* with projective textures, texbem only divides the static texture coord, not the displacement,
3153          * so we can't let the GL handle this.
3154          */
3155     if (flags & WINED3DTTFF_PROJECTED) {
3156         DWORD div_mask=0;
3157         char coord_div_mask[3];
3158         switch (flags & ~WINED3DTTFF_PROJECTED) {
3159             case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
3160             case WINED3DTTFF_COUNT2: div_mask = WINED3DSP_WRITEMASK_1; break;
3161             case WINED3DTTFF_COUNT3: div_mask = WINED3DSP_WRITEMASK_2; break;
3162             case WINED3DTTFF_COUNT4:
3163             case WINED3DTTFF_DISABLE: div_mask = WINED3DSP_WRITEMASK_3; break;
3164         }
3165         shader_glsl_write_mask_to_str(div_mask, coord_div_mask);
3166         shader_addline(ins->ctx->buffer, "T%u%s /= T%u%s;\n", sampler_idx, coord_mask, sampler_idx, coord_div_mask);
3167     }
3168
3169     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &coord_param);
3170
3171     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3172             "T%u%s + vec4(bumpenvmat%d * %s, 0.0, 0.0)%s", sampler_idx, coord_mask, sampler_idx,
3173             coord_param.param_str, coord_mask);
3174
3175     if (ins->handler_idx == WINED3DSIH_TEXBEML)
3176     {
3177         glsl_src_param_t luminance_param;
3178         glsl_dst_param_t dst_param;
3179
3180         shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &luminance_param);
3181         shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3182
3183         shader_addline(ins->ctx->buffer, "%s%s *= (%s * luminancescale%d + luminanceoffset%d);\n",
3184                 dst_param.reg_name, dst_param.mask_str,
3185                 luminance_param.param_str, sampler_idx, sampler_idx);
3186     }
3187 }
3188
3189 static void shader_glsl_bem(const struct wined3d_shader_instruction *ins)
3190 {
3191     glsl_src_param_t src0_param, src1_param;
3192     DWORD sampler_idx = ins->dst[0].reg.idx;
3193
3194     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
3195     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
3196
3197     shader_glsl_append_dst(ins->ctx->buffer, ins);
3198     shader_addline(ins->ctx->buffer, "%s + bumpenvmat%d * %s);\n",
3199             src0_param.param_str, sampler_idx, src1_param.param_str);
3200 }
3201
3202 /** Process the WINED3DSIO_TEXREG2AR instruction in GLSL
3203  * Sample 2D texture at dst using the alpha & red (wx) components of src as texture coordinates */
3204 static void shader_glsl_texreg2ar(const struct wined3d_shader_instruction *ins)
3205 {
3206     glsl_src_param_t src0_param;
3207     DWORD sampler_idx = ins->dst[0].reg.idx;
3208     WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3209     glsl_sample_function_t sample_function;
3210
3211     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
3212
3213     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
3214     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3215             "%s.wx", src0_param.reg_name);
3216 }
3217
3218 /** Process the WINED3DSIO_TEXREG2GB instruction in GLSL
3219  * Sample 2D texture at dst using the green & blue (yz) components of src as texture coordinates */
3220 static void shader_glsl_texreg2gb(const struct wined3d_shader_instruction *ins)
3221 {
3222     glsl_src_param_t src0_param;
3223     DWORD sampler_idx = ins->dst[0].reg.idx;
3224     WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3225     glsl_sample_function_t sample_function;
3226
3227     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
3228
3229     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
3230     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3231             "%s.yz", src0_param.reg_name);
3232 }
3233
3234 /** Process the WINED3DSIO_TEXREG2RGB instruction in GLSL
3235  * Sample texture at dst using the rgb (xyz) components of src as texture coordinates */
3236 static void shader_glsl_texreg2rgb(const struct wined3d_shader_instruction *ins)
3237 {
3238     glsl_src_param_t src0_param;
3239     DWORD sampler_idx = ins->dst[0].reg.idx;
3240     WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3241     glsl_sample_function_t sample_function;
3242
3243     /* Dependent read, not valid with conditional NP2 */
3244     shader_glsl_get_sample_function(sampler_type, 0, &sample_function);
3245     shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &src0_param);
3246
3247     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3248             "%s", src0_param.param_str);
3249 }
3250
3251 /** Process the WINED3DSIO_TEXKILL instruction in GLSL.
3252  * If any of the first 3 components are < 0, discard this pixel */
3253 static void shader_glsl_texkill(const struct wined3d_shader_instruction *ins)
3254 {
3255     glsl_dst_param_t dst_param;
3256
3257     /* The argument is a destination parameter, and no writemasks are allowed */
3258     shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3259     if (ins->ctx->reg_maps->shader_version.major >= 2)
3260     {
3261         /* 2.0 shaders compare all 4 components in texkill */
3262         shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyzw, vec4(0.0)))) discard;\n", dst_param.reg_name);
3263     } else {
3264         /* 1.X shaders only compare the first 3 components, probably due to the nature of the texkill
3265          * instruction as a tex* instruction, and phase, which kills all a / w components. Even if all
3266          * 4 components are defined, only the first 3 are used
3267          */
3268         shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;\n", dst_param.reg_name);
3269     }
3270 }
3271
3272 /** Process the WINED3DSIO_DP2ADD instruction in GLSL.
3273  * dst = dot2(src0, src1) + src2 */
3274 static void shader_glsl_dp2add(const struct wined3d_shader_instruction *ins)
3275 {
3276     glsl_src_param_t src0_param;
3277     glsl_src_param_t src1_param;
3278     glsl_src_param_t src2_param;
3279     DWORD write_mask;
3280     unsigned int mask_size;
3281
3282     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3283     mask_size = shader_glsl_get_write_mask_size(write_mask);
3284
3285     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
3286     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
3287     shader_glsl_add_src_param(ins, &ins->src[2], WINED3DSP_WRITEMASK_0, &src2_param);
3288
3289     if (mask_size > 1) {
3290         shader_addline(ins->ctx->buffer, "vec%d(dot(%s, %s) + %s));\n",
3291                 mask_size, src0_param.param_str, src1_param.param_str, src2_param.param_str);
3292     } else {
3293         shader_addline(ins->ctx->buffer, "dot(%s, %s) + %s);\n",
3294                 src0_param.param_str, src1_param.param_str, src2_param.param_str);
3295     }
3296 }
3297
3298 static void shader_glsl_input_pack(IWineD3DPixelShader *iface, struct wined3d_shader_buffer *buffer,
3299         const struct wined3d_shader_signature_element *input_signature, const struct shader_reg_maps *reg_maps,
3300         enum vertexprocessing_mode vertexprocessing)
3301 {
3302     unsigned int i;
3303     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
3304     WORD map = reg_maps->input_registers;
3305
3306     for (i = 0; map; map >>= 1, ++i)
3307     {
3308         const char *semantic_name;
3309         UINT semantic_idx;
3310         char reg_mask[6];
3311
3312         /* Unused */
3313         if (!(map & 1)) continue;
3314
3315         semantic_name = input_signature[i].semantic_name;
3316         semantic_idx = input_signature[i].semantic_idx;
3317         shader_glsl_write_mask_to_str(input_signature[i].mask, reg_mask);
3318
3319         if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD))
3320         {
3321             if (semantic_idx < 8 && vertexprocessing == pretransformed)
3322                 shader_addline(buffer, "IN[%u]%s = gl_TexCoord[%u]%s;\n",
3323                         This->input_reg_map[i], reg_mask, semantic_idx, reg_mask);
3324             else
3325                 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3326                         This->input_reg_map[i], reg_mask, reg_mask);
3327         }
3328         else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR))
3329         {
3330             if (semantic_idx == 0)
3331                 shader_addline(buffer, "IN[%u]%s = vec4(gl_Color)%s;\n",
3332                         This->input_reg_map[i], reg_mask, reg_mask);
3333             else if (semantic_idx == 1)
3334                 shader_addline(buffer, "IN[%u]%s = vec4(gl_SecondaryColor)%s;\n",
3335                         This->input_reg_map[i], reg_mask, reg_mask);
3336             else
3337                 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3338                         This->input_reg_map[i], reg_mask, reg_mask);
3339         }
3340         else
3341         {
3342             shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3343                     This->input_reg_map[i], reg_mask, reg_mask);
3344         }
3345     }
3346 }
3347
3348 /*********************************************
3349  * Vertex Shader Specific Code begins here
3350  ********************************************/
3351
3352 static void add_glsl_program_entry(struct shader_glsl_priv *priv, struct glsl_shader_prog_link *entry) {
3353     glsl_program_key_t key;
3354
3355     key.vshader = entry->vshader;
3356     key.pshader = entry->pshader;
3357     key.vs_args = entry->vs_args;
3358     key.ps_args = entry->ps_args;
3359
3360     if (wine_rb_put(&priv->program_lookup, &key, &entry->program_lookup_entry) == -1)
3361     {
3362         ERR("Failed to insert program entry.\n");
3363     }
3364 }
3365
3366 static struct glsl_shader_prog_link *get_glsl_program_entry(struct shader_glsl_priv *priv,
3367         IWineD3DVertexShader *vshader, IWineD3DPixelShader *pshader, struct vs_compile_args *vs_args,
3368         struct ps_compile_args *ps_args) {
3369     struct wine_rb_entry *entry;
3370     glsl_program_key_t key;
3371
3372     key.vshader = vshader;
3373     key.pshader = pshader;
3374     key.vs_args = *vs_args;
3375     key.ps_args = *ps_args;
3376
3377     entry = wine_rb_get(&priv->program_lookup, &key);
3378     return entry ? WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry) : NULL;
3379 }
3380
3381 /* GL locking is done by the caller */
3382 static void delete_glsl_program_entry(struct shader_glsl_priv *priv, const struct wined3d_gl_info *gl_info,
3383         struct glsl_shader_prog_link *entry)
3384 {
3385     glsl_program_key_t key;
3386
3387     key.vshader = entry->vshader;
3388     key.pshader = entry->pshader;
3389     key.vs_args = entry->vs_args;
3390     key.ps_args = entry->ps_args;
3391     wine_rb_remove(&priv->program_lookup, &key);
3392
3393     GL_EXTCALL(glDeleteObjectARB(entry->programId));
3394     if (entry->vshader) list_remove(&entry->vshader_entry);
3395     if (entry->pshader) list_remove(&entry->pshader_entry);
3396     HeapFree(GetProcessHeap(), 0, entry->vuniformF_locations);
3397     HeapFree(GetProcessHeap(), 0, entry->puniformF_locations);
3398     HeapFree(GetProcessHeap(), 0, entry);
3399 }
3400
3401 static void handle_ps3_input(struct wined3d_shader_buffer *buffer, const struct wined3d_gl_info *gl_info, const DWORD *map,
3402         const struct wined3d_shader_signature_element *input_signature, const struct shader_reg_maps *reg_maps_in,
3403         const struct wined3d_shader_signature_element *output_signature, const struct shader_reg_maps *reg_maps_out)
3404 {
3405     unsigned int i, j;
3406     const char *semantic_name_in, *semantic_name_out;
3407     UINT semantic_idx_in, semantic_idx_out;
3408     DWORD *set;
3409     DWORD in_idx;
3410     unsigned int in_count = vec4_varyings(3, gl_info);
3411     char reg_mask[6], reg_mask_out[6];
3412     char destination[50];
3413     WORD input_map, output_map;
3414
3415     set = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*set) * (in_count + 2));
3416
3417     if (!output_signature)
3418     {
3419         /* Save gl_FrontColor & gl_FrontSecondaryColor before overwriting them. */
3420         shader_addline(buffer, "vec4 front_color = gl_FrontColor;\n");
3421         shader_addline(buffer, "vec4 front_secondary_color = gl_FrontSecondaryColor;\n");
3422     }
3423
3424     input_map = reg_maps_in->input_registers;
3425     for (i = 0; input_map; input_map >>= 1, ++i)
3426     {
3427         if (!(input_map & 1)) continue;
3428
3429         in_idx = map[i];
3430         if (in_idx >= (in_count + 2)) {
3431             FIXME("More input varyings declared than supported, expect issues\n");
3432             continue;
3433         }
3434         else if (map[i] == ~0U)
3435         {
3436             /* Declared, but not read register */
3437             continue;
3438         }
3439
3440         if (in_idx == in_count) {
3441             sprintf(destination, "gl_FrontColor");
3442         } else if (in_idx == in_count + 1) {
3443             sprintf(destination, "gl_FrontSecondaryColor");
3444         } else {
3445             sprintf(destination, "IN[%u]", in_idx);
3446         }
3447
3448         semantic_name_in = input_signature[i].semantic_name;
3449         semantic_idx_in = input_signature[i].semantic_idx;
3450         set[map[i]] = input_signature[i].mask;
3451         shader_glsl_write_mask_to_str(input_signature[i].mask, reg_mask);
3452
3453         if (!output_signature)
3454         {
3455             if (shader_match_semantic(semantic_name_in, WINED3DDECLUSAGE_COLOR))
3456             {
3457                 if (semantic_idx_in == 0)
3458                     shader_addline(buffer, "%s%s = front_color%s;\n",
3459                             destination, reg_mask, reg_mask);
3460                 else if (semantic_idx_in == 1)
3461                     shader_addline(buffer, "%s%s = front_secondary_color%s;\n",
3462                             destination, reg_mask, reg_mask);
3463                 else
3464                     shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3465                             destination, reg_mask, reg_mask);
3466             }
3467             else if (shader_match_semantic(semantic_name_in, WINED3DDECLUSAGE_TEXCOORD))
3468             {
3469                 if (semantic_idx_in < 8)
3470                 {
3471                     shader_addline(buffer, "%s%s = gl_TexCoord[%u]%s;\n",
3472                             destination, reg_mask, semantic_idx_in, reg_mask);
3473                 }
3474                 else
3475                 {
3476                     shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3477                             destination, reg_mask, reg_mask);
3478                 }
3479             }
3480             else if (shader_match_semantic(semantic_name_in, WINED3DDECLUSAGE_FOG))
3481             {
3482                 shader_addline(buffer, "%s%s = vec4(gl_FogFragCoord, 0.0, 0.0, 0.0)%s;\n",
3483                         destination, reg_mask, reg_mask);
3484             }
3485             else
3486             {
3487                 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3488                         destination, reg_mask, reg_mask);
3489             }
3490         } else {
3491             BOOL found = FALSE;
3492
3493             output_map = reg_maps_out->output_registers;
3494             for (j = 0; output_map; output_map >>= 1, ++j)
3495             {
3496                 if (!(output_map & 1)) continue;
3497
3498                 semantic_name_out = output_signature[j].semantic_name;
3499                 semantic_idx_out = output_signature[j].semantic_idx;
3500                 shader_glsl_write_mask_to_str(output_signature[j].mask, reg_mask_out);
3501
3502                 if (semantic_idx_in == semantic_idx_out
3503                         && !strcmp(semantic_name_in, semantic_name_out))
3504                 {
3505                     shader_addline(buffer, "%s%s = OUT[%u]%s;\n",
3506                             destination, reg_mask, j, reg_mask);
3507                     found = TRUE;
3508                 }
3509             }
3510             if(!found) {
3511                 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3512                                destination, reg_mask, reg_mask);
3513             }
3514         }
3515     }
3516
3517     /* This is solely to make the compiler / linker happy and avoid warning about undefined
3518      * varyings. It shouldn't result in any real code executed on the GPU, since all read
3519      * input varyings are assigned above, if the optimizer works properly.
3520      */
3521     for(i = 0; i < in_count + 2; i++) {
3522         if (set[i] && set[i] != WINED3DSP_WRITEMASK_ALL)
3523         {
3524             unsigned int size = 0;
3525             memset(reg_mask, 0, sizeof(reg_mask));
3526             if(!(set[i] & WINED3DSP_WRITEMASK_0)) {
3527                 reg_mask[size] = 'x';
3528                 size++;
3529             }
3530             if(!(set[i] & WINED3DSP_WRITEMASK_1)) {
3531                 reg_mask[size] = 'y';
3532                 size++;
3533             }
3534             if(!(set[i] & WINED3DSP_WRITEMASK_2)) {
3535                 reg_mask[size] = 'z';
3536                 size++;
3537             }
3538             if(!(set[i] & WINED3DSP_WRITEMASK_3)) {
3539                 reg_mask[size] = 'w';
3540                 size++;
3541             }
3542
3543             if (i == in_count) {
3544                 sprintf(destination, "gl_FrontColor");
3545             } else if (i == in_count + 1) {
3546                 sprintf(destination, "gl_FrontSecondaryColor");
3547             } else {
3548                 sprintf(destination, "IN[%u]", i);
3549             }
3550
3551             if (size == 1) {
3552                 shader_addline(buffer, "%s.%s = 0.0;\n", destination, reg_mask);
3553             } else {
3554                 shader_addline(buffer, "%s.%s = vec%u(0.0);\n", destination, reg_mask, size);
3555             }
3556         }
3557     }
3558
3559     HeapFree(GetProcessHeap(), 0, set);
3560 }
3561
3562 /* GL locking is done by the caller */
3563 static GLhandleARB generate_param_reorder_function(struct wined3d_shader_buffer *buffer,
3564         IWineD3DVertexShader *vertexshader, IWineD3DPixelShader *pixelshader, const struct wined3d_gl_info *gl_info)
3565 {
3566     GLhandleARB ret = 0;
3567     IWineD3DVertexShaderImpl *vs = (IWineD3DVertexShaderImpl *) vertexshader;
3568     IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) pixelshader;
3569     IWineD3DDeviceImpl *device;
3570     DWORD vs_major = vs->baseShader.reg_maps.shader_version.major;
3571     DWORD ps_major = ps ? ps->baseShader.reg_maps.shader_version.major : 0;
3572     unsigned int i;
3573     const char *semantic_name;
3574     UINT semantic_idx;
3575     char reg_mask[6];
3576     const struct wined3d_shader_signature_element *output_signature;
3577
3578     shader_buffer_clear(buffer);
3579
3580     shader_addline(buffer, "#version 120\n");
3581
3582     if(vs_major < 3 && ps_major < 3) {
3583         /* That one is easy: The vertex shader writes to the builtin varyings, the pixel shader reads from them.
3584          * Take care about the texcoord .w fixup though if we're using the fixed function fragment pipeline
3585          */
3586         device = (IWineD3DDeviceImpl *) vs->baseShader.device;
3587         if ((gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W)
3588                 && ps_major == 0 && vs_major > 0 && !device->frag_pipe->ffp_proj_control)
3589         {
3590             shader_addline(buffer, "void order_ps_input() {\n");
3591             for(i = 0; i < min(8, MAX_REG_TEXCRD); i++) {
3592                 if(vs->baseShader.reg_maps.texcoord_mask[i] != 0 &&
3593                    vs->baseShader.reg_maps.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) {
3594                     shader_addline(buffer, "gl_TexCoord[%u].w = 1.0;\n", i);
3595                 }
3596             }
3597             shader_addline(buffer, "}\n");
3598         } else {
3599             shader_addline(buffer, "void order_ps_input() { /* do nothing */ }\n");
3600         }
3601     } else if(ps_major < 3 && vs_major >= 3) {
3602         WORD map = vs->baseShader.reg_maps.output_registers;
3603
3604         /* The vertex shader writes to its own varyings, the pixel shader needs them in the builtin ones */
3605         output_signature = vs->output_signature;
3606
3607         shader_addline(buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
3608         for (i = 0; map; map >>= 1, ++i)
3609         {
3610             DWORD write_mask;
3611
3612             if (!(map & 1)) continue;
3613
3614             semantic_name = output_signature[i].semantic_name;
3615             semantic_idx = output_signature[i].semantic_idx;
3616             write_mask = output_signature[i].mask;
3617             shader_glsl_write_mask_to_str(write_mask, reg_mask);
3618
3619             if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR))
3620             {
3621                 if (semantic_idx == 0)
3622                     shader_addline(buffer, "gl_FrontColor%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
3623                 else if (semantic_idx == 1)
3624                     shader_addline(buffer, "gl_FrontSecondaryColor%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
3625             }
3626             else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION))
3627             {
3628                 shader_addline(buffer, "gl_Position%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
3629             }
3630             else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD))
3631             {
3632                 if (semantic_idx < 8)
3633                 {
3634                     if (!(gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) || ps_major > 0)
3635                         write_mask |= WINED3DSP_WRITEMASK_3;
3636
3637                     shader_addline(buffer, "gl_TexCoord[%u]%s = OUT[%u]%s;\n",
3638                             semantic_idx, reg_mask, i, reg_mask);
3639                     if (!(write_mask & WINED3DSP_WRITEMASK_3))
3640                         shader_addline(buffer, "gl_TexCoord[%u].w = 1.0;\n", semantic_idx);
3641                 }
3642             }
3643             else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE))
3644             {
3645                 shader_addline(buffer, "gl_PointSize = OUT[%u].x;\n", i);
3646             }
3647             else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG))
3648             {
3649                 shader_addline(buffer, "gl_FogFragCoord = OUT[%u].%c;\n", i, reg_mask[1]);
3650             }
3651         }
3652         shader_addline(buffer, "}\n");
3653
3654     } else if(ps_major >= 3 && vs_major >= 3) {
3655         WORD map = vs->baseShader.reg_maps.output_registers;
3656
3657         output_signature = vs->output_signature;
3658
3659         /* This one is tricky: a 3.0 pixel shader reads from a 3.0 vertex shader */
3660         shader_addline(buffer, "varying vec4 IN[%u];\n", vec4_varyings(3, gl_info));
3661         shader_addline(buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
3662
3663         /* First, sort out position and point size. Those are not passed to the pixel shader */
3664         for (i = 0; map; map >>= 1, ++i)
3665         {
3666             if (!(map & 1)) continue;
3667
3668             semantic_name = output_signature[i].semantic_name;
3669             shader_glsl_write_mask_to_str(output_signature[i].mask, reg_mask);
3670
3671             if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION))
3672             {
3673                 shader_addline(buffer, "gl_Position%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
3674             }
3675             else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE))
3676             {
3677                 shader_addline(buffer, "gl_PointSize = OUT[%u].x;\n", i);
3678             }
3679         }
3680
3681         /* Then, fix the pixel shader input */
3682         handle_ps3_input(buffer, gl_info, ps->input_reg_map, ps->input_signature,
3683                 &ps->baseShader.reg_maps, output_signature, &vs->baseShader.reg_maps);
3684
3685         shader_addline(buffer, "}\n");
3686     } else if(ps_major >= 3 && vs_major < 3) {
3687         shader_addline(buffer, "varying vec4 IN[%u];\n", vec4_varyings(3, gl_info));
3688         shader_addline(buffer, "void order_ps_input() {\n");
3689         /* The vertex shader wrote to the builtin varyings. There is no need to figure out position and
3690          * point size, but we depend on the optimizers kindness to find out that the pixel shader doesn't
3691          * read gl_TexCoord and gl_ColorX, otherwise we'll run out of varyings
3692          */
3693         handle_ps3_input(buffer, gl_info, ps->input_reg_map, ps->input_signature,
3694                 &ps->baseShader.reg_maps, NULL, NULL);
3695         shader_addline(buffer, "}\n");
3696     } else {
3697         ERR("Unexpected vertex and pixel shader version condition: vs: %d, ps: %d\n", vs_major, ps_major);
3698     }
3699
3700     ret = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
3701     checkGLcall("glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)");
3702     GL_EXTCALL(glShaderSourceARB(ret, 1, (const char**)&buffer->buffer, NULL));
3703     checkGLcall("glShaderSourceARB(ret, 1, &buffer->buffer, NULL)");
3704     GL_EXTCALL(glCompileShaderARB(ret));
3705     checkGLcall("glCompileShaderARB(ret)");
3706
3707     return ret;
3708 }
3709
3710 /* GL locking is done by the caller */
3711 static void hardcode_local_constants(IWineD3DBaseShaderImpl *shader, const struct wined3d_gl_info *gl_info,
3712         GLhandleARB programId, char prefix)
3713 {
3714     const local_constant *lconst;
3715     GLint tmp_loc;
3716     const float *value;
3717     char glsl_name[8];
3718
3719     LIST_FOR_EACH_ENTRY(lconst, &shader->baseShader.constantsF, local_constant, entry) {
3720         value = (const float *)lconst->value;
3721         snprintf(glsl_name, sizeof(glsl_name), "%cLC%u", prefix, lconst->idx);
3722         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
3723         GL_EXTCALL(glUniform4fvARB(tmp_loc, 1, value));
3724     }
3725     checkGLcall("Hardcoding local constants");
3726 }
3727
3728 /* GL locking is done by the caller */
3729 static GLuint shader_glsl_generate_pshader(const struct wined3d_context *context,
3730         struct wined3d_shader_buffer *buffer, IWineD3DPixelShaderImpl *This,
3731         const struct ps_compile_args *args, struct ps_np2fixup_info *np2fixup_info)
3732 {
3733     const struct shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
3734     const struct wined3d_gl_info *gl_info = context->gl_info;
3735     CONST DWORD *function = This->baseShader.function;
3736     struct shader_glsl_ctx_priv priv_ctx;
3737
3738     /* Create the hw GLSL shader object and assign it as the shader->prgId */
3739     GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
3740
3741     memset(&priv_ctx, 0, sizeof(priv_ctx));
3742     priv_ctx.cur_ps_args = args;
3743     priv_ctx.cur_np2fixup_info = np2fixup_info;
3744
3745     shader_addline(buffer, "#version 120\n");
3746
3747     if (gl_info->supported[ARB_SHADER_TEXTURE_LOD] && reg_maps->usestexldd)
3748     {
3749         shader_addline(buffer, "#extension GL_ARB_shader_texture_lod : enable\n");
3750     }
3751     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
3752     {
3753         /* The spec says that it doesn't have to be explicitly enabled, but the nvidia
3754          * drivers write a warning if we don't do so
3755          */
3756         shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n");
3757     }
3758
3759     /* Base Declarations */
3760     shader_generate_glsl_declarations(context, buffer, (IWineD3DBaseShader *)This, reg_maps, &priv_ctx);
3761
3762     /* Pack 3.0 inputs */
3763     if (reg_maps->shader_version.major >= 3 && args->vp_mode != vertexshader)
3764     {
3765         shader_glsl_input_pack((IWineD3DPixelShader *) This, buffer, This->input_signature, reg_maps, args->vp_mode);
3766     }
3767
3768     /* Base Shader Body */
3769     shader_generate_main((IWineD3DBaseShader *)This, buffer, reg_maps, function, &priv_ctx);
3770
3771     /* Pixel shaders < 2.0 place the resulting color in R0 implicitly */
3772     if (reg_maps->shader_version.major < 2)
3773     {
3774         /* Some older cards like GeforceFX ones don't support multiple buffers, so also not gl_FragData */
3775         shader_addline(buffer, "gl_FragData[0] = R0;\n");
3776     }
3777
3778     if (args->srgb_correction)
3779     {
3780         shader_addline(buffer, "tmp0.xyz = pow(gl_FragData[0].xyz, vec3(srgb_const0.x));\n");
3781         shader_addline(buffer, "tmp0.xyz = tmp0.xyz * vec3(srgb_const0.y) - vec3(srgb_const0.z);\n");
3782         shader_addline(buffer, "tmp1.xyz = gl_FragData[0].xyz * vec3(srgb_const0.w);\n");
3783         shader_addline(buffer, "bvec3 srgb_compare = lessThan(gl_FragData[0].xyz, vec3(srgb_const1.x));\n");
3784         shader_addline(buffer, "gl_FragData[0].xyz = mix(tmp0.xyz, tmp1.xyz, vec3(srgb_compare));\n");
3785         shader_addline(buffer, "gl_FragData[0] = clamp(gl_FragData[0], 0.0, 1.0);\n");
3786     }
3787     /* Pixel shader < 3.0 do not replace the fog stage.
3788      * This implements linear fog computation and blending.
3789      * TODO: non linear fog
3790      * NOTE: gl_Fog.start and gl_Fog.end don't hold fog start s and end e but
3791      * -1/(e-s) and e/(e-s) respectively.
3792      */
3793     if (reg_maps->shader_version.major < 3)
3794     {
3795         switch(args->fog) {
3796             case FOG_OFF: break;
3797             case FOG_LINEAR:
3798                 shader_addline(buffer, "float fogstart = -1.0 / (gl_Fog.end - gl_Fog.start);\n");
3799                 shader_addline(buffer, "float fogend = gl_Fog.end * -fogstart;\n");
3800                 shader_addline(buffer, "float Fog = clamp(gl_FogFragCoord * fogstart + fogend, 0.0, 1.0);\n");
3801                 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
3802                 break;
3803             case FOG_EXP:
3804                 /* Fog = e^(-gl_Fog.density * gl_FogFragCoord) */
3805                 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_FogFragCoord);\n");
3806                 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n");
3807                 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
3808                 break;
3809             case FOG_EXP2:
3810                 /* Fog = e^(-(gl_Fog.density * gl_FogFragCoord)^2) */
3811                 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_Fog.density * gl_FogFragCoord * gl_FogFragCoord);\n");
3812                 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n");
3813                 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
3814                 break;
3815         }
3816     }
3817
3818     shader_addline(buffer, "}\n");
3819
3820     TRACE("Compiling shader object %u\n", shader_obj);
3821     GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer->buffer, NULL));
3822     GL_EXTCALL(glCompileShaderARB(shader_obj));
3823     print_glsl_info_log(gl_info, shader_obj);
3824
3825     /* Store the shader object */
3826     return shader_obj;
3827 }
3828
3829 /* GL locking is done by the caller */
3830 static GLuint shader_glsl_generate_vshader(const struct wined3d_context *context,
3831         struct wined3d_shader_buffer *buffer, IWineD3DVertexShaderImpl *This,
3832         const struct vs_compile_args *args)
3833 {
3834     const struct shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
3835     const struct wined3d_gl_info *gl_info = context->gl_info;
3836     CONST DWORD *function = This->baseShader.function;
3837     struct shader_glsl_ctx_priv priv_ctx;
3838
3839     /* Create the hw GLSL shader program and assign it as the shader->prgId */
3840     GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
3841
3842     shader_addline(buffer, "#version 120\n");
3843
3844     memset(&priv_ctx, 0, sizeof(priv_ctx));
3845     priv_ctx.cur_vs_args = args;
3846
3847     /* Base Declarations */
3848     shader_generate_glsl_declarations(context, buffer, (IWineD3DBaseShader *)This, reg_maps, &priv_ctx);
3849
3850     /* Base Shader Body */
3851     shader_generate_main((IWineD3DBaseShader*)This, buffer, reg_maps, function, &priv_ctx);
3852
3853     /* Unpack 3.0 outputs */
3854     if (reg_maps->shader_version.major >= 3) shader_addline(buffer, "order_ps_input(OUT);\n");
3855     else shader_addline(buffer, "order_ps_input();\n");
3856
3857     /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used
3858      * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE),
3859      * the fog frag coord is thrown away. If the fog frag coord is used, but not written by
3860      * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0)
3861      */
3862     if(args->fog_src == VS_FOG_Z) {
3863         shader_addline(buffer, "gl_FogFragCoord = gl_Position.z;\n");
3864     } else if (!reg_maps->fog) {
3865         shader_addline(buffer, "gl_FogFragCoord = 0.0;\n");
3866     }
3867
3868     /* Write the final position.
3869      *
3870      * OpenGL coordinates specify the center of the pixel while d3d coords specify
3871      * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
3872      * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
3873      * contains 1.0 to allow a mad.
3874      */
3875     shader_addline(buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n");
3876     shader_addline(buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n");
3877     shader_addline(buffer, "gl_ClipVertex = gl_Position;\n");
3878
3879     /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
3880      *
3881      * Basically we want (in homogeneous coordinates) z = z * 2 - 1. However, shaders are run
3882      * before the homogeneous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
3883      * which is the same as z = z * 2 - w.
3884      */
3885     shader_addline(buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
3886
3887     shader_addline(buffer, "}\n");
3888
3889     TRACE("Compiling shader object %u\n", shader_obj);
3890     GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer->buffer, NULL));
3891     GL_EXTCALL(glCompileShaderARB(shader_obj));
3892     print_glsl_info_log(gl_info, shader_obj);
3893
3894     return shader_obj;
3895 }
3896
3897 static GLhandleARB find_glsl_pshader(const struct wined3d_context *context,
3898         struct wined3d_shader_buffer *buffer, IWineD3DPixelShaderImpl *shader,
3899         const struct ps_compile_args *args, const struct ps_np2fixup_info **np2fixup_info)
3900 {
3901     UINT i;
3902     DWORD new_size;
3903     struct glsl_ps_compiled_shader *new_array;
3904     struct glsl_pshader_private    *shader_data;
3905     struct ps_np2fixup_info        *np2fixup = NULL;
3906     GLhandleARB ret;
3907
3908     if (!shader->baseShader.backend_data)
3909     {
3910         shader->baseShader.backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
3911         if (!shader->baseShader.backend_data)
3912         {
3913             ERR("Failed to allocate backend data.\n");
3914             return 0;
3915         }
3916     }
3917     shader_data = shader->baseShader.backend_data;
3918
3919     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
3920      * so a linear search is more performant than a hashmap or a binary search
3921      * (cache coherency etc)
3922      */
3923     for(i = 0; i < shader_data->num_gl_shaders; i++) {
3924         if(memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args)) == 0) {
3925             if(args->np2_fixup) *np2fixup_info = &shader_data->gl_shaders[i].np2fixup;
3926             return shader_data->gl_shaders[i].prgId;
3927         }
3928     }
3929
3930     TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader);
3931     if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
3932         if (shader_data->num_gl_shaders)
3933         {
3934             new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
3935             new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders,
3936                                     new_size * sizeof(*shader_data->gl_shaders));
3937         } else {
3938             new_array = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader_data->gl_shaders));
3939             new_size = 1;
3940         }
3941
3942         if(!new_array) {
3943             ERR("Out of memory\n");
3944             return 0;
3945         }
3946         shader_data->gl_shaders = new_array;
3947         shader_data->shader_array_size = new_size;
3948     }
3949
3950     shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
3951
3952     memset(&shader_data->gl_shaders[shader_data->num_gl_shaders].np2fixup, 0, sizeof(struct ps_np2fixup_info));
3953     if (args->np2_fixup) np2fixup = &shader_data->gl_shaders[shader_data->num_gl_shaders].np2fixup;
3954
3955     pixelshader_update_samplers(&shader->baseShader.reg_maps,
3956             ((IWineD3DDeviceImpl *)shader->baseShader.device)->stateBlock->textures);
3957
3958     shader_buffer_clear(buffer);
3959     ret = shader_glsl_generate_pshader(context, buffer, shader, args, np2fixup);
3960     shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
3961     *np2fixup_info = np2fixup;
3962
3963     return ret;
3964 }
3965
3966 static inline BOOL vs_args_equal(const struct vs_compile_args *stored, const struct vs_compile_args *new,
3967                                  const DWORD use_map) {
3968     if((stored->swizzle_map & use_map) != new->swizzle_map) return FALSE;
3969     return stored->fog_src == new->fog_src;
3970 }
3971
3972 static GLhandleARB find_glsl_vshader(const struct wined3d_context *context,
3973         struct wined3d_shader_buffer *buffer, IWineD3DVertexShaderImpl *shader,
3974         const struct vs_compile_args *args)
3975 {
3976     UINT i;
3977     DWORD new_size;
3978     struct glsl_vs_compiled_shader *new_array;
3979     DWORD use_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.use_map;
3980     struct glsl_vshader_private *shader_data;
3981     GLhandleARB ret;
3982
3983     if (!shader->baseShader.backend_data)
3984     {
3985         shader->baseShader.backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
3986         if (!shader->baseShader.backend_data)
3987         {
3988             ERR("Failed to allocate backend data.\n");
3989             return 0;
3990         }
3991     }
3992     shader_data = shader->baseShader.backend_data;
3993
3994     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
3995      * so a linear search is more performant than a hashmap or a binary search
3996      * (cache coherency etc)
3997      */
3998     for(i = 0; i < shader_data->num_gl_shaders; i++) {
3999         if(vs_args_equal(&shader_data->gl_shaders[i].args, args, use_map)) {
4000             return shader_data->gl_shaders[i].prgId;
4001         }
4002     }
4003
4004     TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader);
4005
4006     if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
4007         if (shader_data->num_gl_shaders)
4008         {
4009             new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4010             new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders,
4011                                     new_size * sizeof(*shader_data->gl_shaders));
4012         } else {
4013             new_array = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader_data->gl_shaders));
4014             new_size = 1;
4015         }
4016
4017         if(!new_array) {
4018             ERR("Out of memory\n");
4019             return 0;
4020         }
4021         shader_data->gl_shaders = new_array;
4022         shader_data->shader_array_size = new_size;
4023     }
4024
4025     shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
4026
4027     shader_buffer_clear(buffer);
4028     ret = shader_glsl_generate_vshader(context, buffer, shader, args);
4029     shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
4030
4031     return ret;
4032 }
4033
4034 /** Sets the GLSL program ID for the given pixel and vertex shader combination.
4035  * It sets the programId on the current StateBlock (because it should be called
4036  * inside of the DrawPrimitive() part of the render loop).
4037  *
4038  * If a program for the given combination does not exist, create one, and store
4039  * the program in the hash table.  If it creates a program, it will link the
4040  * given objects, too.
4041  */
4042
4043 /* GL locking is done by the caller */
4044 static void set_glsl_shader_program(const struct wined3d_context *context,
4045         IWineD3DDeviceImpl *device, BOOL use_ps, BOOL use_vs)
4046 {
4047     IWineD3DVertexShader *vshader = use_vs ? device->stateBlock->vertexShader : NULL;
4048     IWineD3DPixelShader *pshader = use_ps ? device->stateBlock->pixelShader : NULL;
4049     const struct wined3d_gl_info *gl_info = context->gl_info;
4050     struct shader_glsl_priv *priv = device->shader_priv;
4051     struct glsl_shader_prog_link *entry    = NULL;
4052     GLhandleARB programId                  = 0;
4053     GLhandleARB reorder_shader_id          = 0;
4054     unsigned int i;
4055     char glsl_name[8];
4056     struct ps_compile_args ps_compile_args;
4057     struct vs_compile_args vs_compile_args;
4058
4059     if (vshader) find_vs_compile_args((IWineD3DVertexShaderImpl *)vshader, device->stateBlock, &vs_compile_args);
4060     if (pshader) find_ps_compile_args((IWineD3DPixelShaderImpl *)pshader, device->stateBlock, &ps_compile_args);
4061
4062     entry = get_glsl_program_entry(priv, vshader, pshader, &vs_compile_args, &ps_compile_args);
4063     if (entry) {
4064         priv->glsl_program = entry;
4065         return;
4066     }
4067
4068     /* If we get to this point, then no matching program exists, so we create one */
4069     programId = GL_EXTCALL(glCreateProgramObjectARB());
4070     TRACE("Created new GLSL shader program %u\n", programId);
4071
4072     /* Create the entry */
4073     entry = HeapAlloc(GetProcessHeap(), 0, sizeof(struct glsl_shader_prog_link));
4074     entry->programId = programId;
4075     entry->vshader = vshader;
4076     entry->pshader = pshader;
4077     entry->vs_args = vs_compile_args;
4078     entry->ps_args = ps_compile_args;
4079     entry->constant_version = 0;
4080     entry->np2Fixup_info = NULL;
4081     /* Add the hash table entry */
4082     add_glsl_program_entry(priv, entry);
4083
4084     /* Set the current program */
4085     priv->glsl_program = entry;
4086
4087     /* Attach GLSL vshader */
4088     if (vshader)
4089     {
4090         GLhandleARB vshader_id = find_glsl_vshader(context, &priv->shader_buffer,
4091                 (IWineD3DVertexShaderImpl *)vshader, &vs_compile_args);
4092         WORD map = ((IWineD3DBaseShaderImpl *)vshader)->baseShader.reg_maps.input_registers;
4093         char tmp_name[10];
4094
4095         reorder_shader_id = generate_param_reorder_function(&priv->shader_buffer, vshader, pshader, gl_info);
4096         TRACE("Attaching GLSL shader object %u to program %u\n", reorder_shader_id, programId);
4097         GL_EXTCALL(glAttachObjectARB(programId, reorder_shader_id));
4098         checkGLcall("glAttachObjectARB");
4099         /* Flag the reorder function for deletion, then it will be freed automatically when the program
4100          * is destroyed
4101          */
4102         GL_EXTCALL(glDeleteObjectARB(reorder_shader_id));
4103
4104         TRACE("Attaching GLSL shader object %u to program %u\n", vshader_id, programId);
4105         GL_EXTCALL(glAttachObjectARB(programId, vshader_id));
4106         checkGLcall("glAttachObjectARB");
4107
4108         /* Bind vertex attributes to a corresponding index number to match
4109          * the same index numbers as ARB_vertex_programs (makes loading
4110          * vertex attributes simpler).  With this method, we can use the
4111          * exact same code to load the attributes later for both ARB and
4112          * GLSL shaders.
4113          *
4114          * We have to do this here because we need to know the Program ID
4115          * in order to make the bindings work, and it has to be done prior
4116          * to linking the GLSL program. */
4117         for (i = 0; map; map >>= 1, ++i)
4118         {
4119             if (!(map & 1)) continue;
4120
4121             snprintf(tmp_name, sizeof(tmp_name), "attrib%u", i);
4122             GL_EXTCALL(glBindAttribLocationARB(programId, i, tmp_name));
4123         }
4124         checkGLcall("glBindAttribLocationARB");
4125
4126         list_add_head(&((IWineD3DBaseShaderImpl *)vshader)->baseShader.linked_programs, &entry->vshader_entry);
4127     }
4128
4129     /* Attach GLSL pshader */
4130     if (pshader)
4131     {
4132         GLhandleARB pshader_id = find_glsl_pshader(context, &priv->shader_buffer,
4133                 (IWineD3DPixelShaderImpl *)pshader, &ps_compile_args, &entry->np2Fixup_info);
4134         TRACE("Attaching GLSL shader object %u to program %u\n", pshader_id, programId);
4135         GL_EXTCALL(glAttachObjectARB(programId, pshader_id));
4136         checkGLcall("glAttachObjectARB");
4137
4138         list_add_head(&((IWineD3DBaseShaderImpl *)pshader)->baseShader.linked_programs, &entry->pshader_entry);
4139     }
4140
4141     /* Link the program */
4142     TRACE("Linking GLSL shader program %u\n", programId);
4143     GL_EXTCALL(glLinkProgramARB(programId));
4144     print_glsl_info_log(gl_info, programId);
4145
4146     entry->vuniformF_locations = HeapAlloc(GetProcessHeap(), 0,
4147             sizeof(GLhandleARB) * gl_info->limits.glsl_vs_float_constants);
4148     for (i = 0; i < gl_info->limits.glsl_vs_float_constants; ++i)
4149     {
4150         snprintf(glsl_name, sizeof(glsl_name), "VC[%i]", i);
4151         entry->vuniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4152     }
4153     for (i = 0; i < MAX_CONST_I; ++i)
4154     {
4155         snprintf(glsl_name, sizeof(glsl_name), "VI[%i]", i);
4156         entry->vuniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4157     }
4158     entry->puniformF_locations = HeapAlloc(GetProcessHeap(), 0,
4159             sizeof(GLhandleARB) * gl_info->limits.glsl_ps_float_constants);
4160     for (i = 0; i < gl_info->limits.glsl_ps_float_constants; ++i)
4161     {
4162         snprintf(glsl_name, sizeof(glsl_name), "PC[%i]", i);
4163         entry->puniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4164     }
4165     for (i = 0; i < MAX_CONST_I; ++i)
4166     {
4167         snprintf(glsl_name, sizeof(glsl_name), "PI[%i]", i);
4168         entry->puniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4169     }
4170
4171     if(pshader) {
4172         char name[32];
4173
4174         for(i = 0; i < MAX_TEXTURES; i++) {
4175             sprintf(name, "bumpenvmat%u", i);
4176             entry->bumpenvmat_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
4177             sprintf(name, "luminancescale%u", i);
4178             entry->luminancescale_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
4179             sprintf(name, "luminanceoffset%u", i);
4180             entry->luminanceoffset_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
4181         }
4182
4183         if (ps_compile_args.np2_fixup) {
4184             if (entry->np2Fixup_info) {
4185                 entry->np2Fixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "PsamplerNP2Fixup"));
4186             } else {
4187                 FIXME("NP2 texcoord fixup needed for this pixelshader, but no fixup uniform found.\n");
4188             }
4189         }
4190     }
4191
4192     entry->posFixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "posFixup"));
4193     entry->ycorrection_location = GL_EXTCALL(glGetUniformLocationARB(programId, "ycorrection"));
4194     checkGLcall("Find glsl program uniform locations");
4195
4196     if (pshader
4197             && ((IWineD3DPixelShaderImpl *)pshader)->baseShader.reg_maps.shader_version.major >= 3
4198             && ((IWineD3DPixelShaderImpl *)pshader)->declared_in_count > vec4_varyings(3, gl_info))
4199     {
4200         TRACE("Shader %d needs vertex color clamping disabled\n", programId);
4201         entry->vertex_color_clamp = GL_FALSE;
4202     } else {
4203         entry->vertex_color_clamp = GL_FIXED_ONLY_ARB;
4204     }
4205
4206     /* Set the shader to allow uniform loading on it */
4207     GL_EXTCALL(glUseProgramObjectARB(programId));
4208     checkGLcall("glUseProgramObjectARB(programId)");
4209
4210     /* Load the vertex and pixel samplers now. The function that finds the mappings makes sure
4211      * that it stays the same for each vertexshader-pixelshader pair(=linked glsl program). If
4212      * a pshader with fixed function pipeline is used there are no vertex samplers, and if a
4213      * vertex shader with fixed function pixel processing is used we make sure that the card
4214      * supports enough samplers to allow the max number of vertex samplers with all possible
4215      * fixed function fragment processing setups. So once the program is linked these samplers
4216      * won't change.
4217      */
4218     if (vshader) shader_glsl_load_vsamplers(gl_info, device->texUnitMap, programId);
4219     if (pshader) shader_glsl_load_psamplers(gl_info, device->texUnitMap, programId);
4220
4221     /* If the local constants do not have to be loaded with the environment constants,
4222      * load them now to have them hardcoded in the GLSL program. This saves some CPU cycles
4223      * later
4224      */
4225     if(pshader && !((IWineD3DPixelShaderImpl*)pshader)->baseShader.load_local_constsF) {
4226         hardcode_local_constants((IWineD3DBaseShaderImpl *) pshader, gl_info, programId, 'P');
4227     }
4228     if(vshader && !((IWineD3DVertexShaderImpl*)vshader)->baseShader.load_local_constsF) {
4229         hardcode_local_constants((IWineD3DBaseShaderImpl *) vshader, gl_info, programId, 'V');
4230     }
4231 }
4232
4233 /* GL locking is done by the caller */
4234 static GLhandleARB create_glsl_blt_shader(const struct wined3d_gl_info *gl_info, enum tex_types tex_type)
4235 {
4236     GLhandleARB program_id;
4237     GLhandleARB vshader_id, pshader_id;
4238     static const char *blt_vshader[] =
4239     {
4240         "#version 120\n"
4241         "void main(void)\n"
4242         "{\n"
4243         "    gl_Position = gl_Vertex;\n"
4244         "    gl_FrontColor = vec4(1.0);\n"
4245         "    gl_TexCoord[0] = gl_MultiTexCoord0;\n"
4246         "}\n"
4247     };
4248
4249     static const char *blt_pshaders[tex_type_count] =
4250     {
4251         /* tex_1d */
4252         NULL,
4253         /* tex_2d */
4254         "#version 120\n"
4255         "uniform sampler2D sampler;\n"
4256         "void main(void)\n"
4257         "{\n"
4258         "    gl_FragDepth = texture2D(sampler, gl_TexCoord[0].xy).x;\n"
4259         "}\n",
4260         /* tex_3d */
4261         NULL,
4262         /* tex_cube */
4263         "#version 120\n"
4264         "uniform samplerCube sampler;\n"
4265         "void main(void)\n"
4266         "{\n"
4267         "    gl_FragDepth = textureCube(sampler, gl_TexCoord[0].xyz).x;\n"
4268         "}\n",
4269         /* tex_rect */
4270         "#version 120\n"
4271         "#extension GL_ARB_texture_rectangle : enable\n"
4272         "uniform sampler2DRect sampler;\n"
4273         "void main(void)\n"
4274         "{\n"
4275         "    gl_FragDepth = texture2DRect(sampler, gl_TexCoord[0].xy).x;\n"
4276         "}\n",
4277     };
4278
4279     if (!blt_pshaders[tex_type])
4280     {
4281         FIXME("tex_type %#x not supported\n", tex_type);
4282         tex_type = tex_2d;
4283     }
4284
4285     vshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
4286     GL_EXTCALL(glShaderSourceARB(vshader_id, 1, blt_vshader, NULL));
4287     GL_EXTCALL(glCompileShaderARB(vshader_id));
4288
4289     pshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
4290     GL_EXTCALL(glShaderSourceARB(pshader_id, 1, &blt_pshaders[tex_type], NULL));
4291     GL_EXTCALL(glCompileShaderARB(pshader_id));
4292
4293     program_id = GL_EXTCALL(glCreateProgramObjectARB());
4294     GL_EXTCALL(glAttachObjectARB(program_id, vshader_id));
4295     GL_EXTCALL(glAttachObjectARB(program_id, pshader_id));
4296     GL_EXTCALL(glLinkProgramARB(program_id));
4297
4298     print_glsl_info_log(gl_info, program_id);
4299
4300     /* Once linked we can mark the shaders for deletion. They will be deleted once the program
4301      * is destroyed
4302      */
4303     GL_EXTCALL(glDeleteObjectARB(vshader_id));
4304     GL_EXTCALL(glDeleteObjectARB(pshader_id));
4305     return program_id;
4306 }
4307
4308 /* GL locking is done by the caller */
4309 static void shader_glsl_select(const struct wined3d_context *context, BOOL usePS, BOOL useVS)
4310 {
4311     IWineD3DDeviceImpl *device = ((IWineD3DSurfaceImpl *)context->surface)->resource.wineD3DDevice;
4312     const struct wined3d_gl_info *gl_info = context->gl_info;
4313     struct shader_glsl_priv *priv = device->shader_priv;
4314     GLhandleARB program_id = 0;
4315     GLenum old_vertex_color_clamp, current_vertex_color_clamp;
4316
4317     old_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
4318
4319     if (useVS || usePS) set_glsl_shader_program(context, device, usePS, useVS);
4320     else priv->glsl_program = NULL;
4321
4322     current_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
4323
4324     if (old_vertex_color_clamp != current_vertex_color_clamp)
4325     {
4326         if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT])
4327         {
4328             GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, current_vertex_color_clamp));
4329             checkGLcall("glClampColorARB");
4330         }
4331         else
4332         {
4333             FIXME("vertex color clamp needs to be changed, but extension not supported.\n");
4334         }
4335     }
4336
4337     program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
4338     if (program_id) TRACE("Using GLSL program %u\n", program_id);
4339     GL_EXTCALL(glUseProgramObjectARB(program_id));
4340     checkGLcall("glUseProgramObjectARB");
4341
4342     /* In case that NP2 texcoord fixup data is found for the selected program, trigger a reload of the
4343      * constants. This has to be done because it can't be guaranteed that sampler() (from state.c) is
4344      * called between selecting the shader and using it, which results in wrong fixup for some frames. */
4345     if (priv->glsl_program && priv->glsl_program->np2Fixup_info)
4346     {
4347         shader_glsl_load_np2fixup_constants((IWineD3DDevice *)device, usePS, useVS);
4348     }
4349 }
4350
4351 /* GL locking is done by the caller */
4352 static void shader_glsl_select_depth_blt(IWineD3DDevice *iface, enum tex_types tex_type) {
4353     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
4354     const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
4355     struct shader_glsl_priv *priv = This->shader_priv;
4356     GLhandleARB *blt_program = &priv->depth_blt_program[tex_type];
4357
4358     if (!*blt_program) {
4359         GLint loc;
4360         *blt_program = create_glsl_blt_shader(gl_info, tex_type);
4361         loc = GL_EXTCALL(glGetUniformLocationARB(*blt_program, "sampler"));
4362         GL_EXTCALL(glUseProgramObjectARB(*blt_program));
4363         GL_EXTCALL(glUniform1iARB(loc, 0));
4364     } else {
4365         GL_EXTCALL(glUseProgramObjectARB(*blt_program));
4366     }
4367 }
4368
4369 /* GL locking is done by the caller */
4370 static void shader_glsl_deselect_depth_blt(IWineD3DDevice *iface) {
4371     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
4372     const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
4373     struct shader_glsl_priv *priv = This->shader_priv;
4374     GLhandleARB program_id;
4375
4376     program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
4377     if (program_id) TRACE("Using GLSL program %u\n", program_id);
4378
4379     GL_EXTCALL(glUseProgramObjectARB(program_id));
4380     checkGLcall("glUseProgramObjectARB");
4381 }
4382
4383 static void shader_glsl_destroy(IWineD3DBaseShader *iface) {
4384     const struct list *linked_programs;
4385     IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *) iface;
4386     IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)This->baseShader.device;
4387     struct shader_glsl_priv *priv = device->shader_priv;
4388     const struct wined3d_gl_info *gl_info;
4389     IWineD3DPixelShaderImpl *ps = NULL;
4390     IWineD3DVertexShaderImpl *vs = NULL;
4391     struct wined3d_context *context;
4392
4393     /* Note: Do not use QueryInterface here to find out which shader type this is because this code
4394      * can be called from IWineD3DBaseShader::Release
4395      */
4396     char pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type);
4397
4398     if(pshader) {
4399         struct glsl_pshader_private *shader_data;
4400         ps = (IWineD3DPixelShaderImpl *) This;
4401         shader_data = ps->baseShader.backend_data;
4402         if(!shader_data || shader_data->num_gl_shaders == 0)
4403         {
4404             HeapFree(GetProcessHeap(), 0, shader_data);
4405             ps->baseShader.backend_data = NULL;
4406             return;
4407         }
4408
4409         context = context_acquire(device, NULL, CTXUSAGE_RESOURCELOAD);
4410         gl_info = context->gl_info;
4411
4412         if (priv->glsl_program && (IWineD3DBaseShader *)priv->glsl_program->pshader == iface)
4413         {
4414             ENTER_GL();
4415             shader_glsl_select(context, FALSE, FALSE);
4416             LEAVE_GL();
4417         }
4418     } else {
4419         struct glsl_vshader_private *shader_data;
4420         vs = (IWineD3DVertexShaderImpl *) This;
4421         shader_data = vs->baseShader.backend_data;
4422         if(!shader_data || shader_data->num_gl_shaders == 0)
4423         {
4424             HeapFree(GetProcessHeap(), 0, shader_data);
4425             vs->baseShader.backend_data = NULL;
4426             return;
4427         }
4428
4429         context = context_acquire(device, NULL, CTXUSAGE_RESOURCELOAD);
4430         gl_info = context->gl_info;
4431
4432         if (priv->glsl_program && (IWineD3DBaseShader *)priv->glsl_program->vshader == iface)
4433         {
4434             ENTER_GL();
4435             shader_glsl_select(context, FALSE, FALSE);
4436             LEAVE_GL();
4437         }
4438     }
4439
4440     linked_programs = &This->baseShader.linked_programs;
4441
4442     TRACE("Deleting linked programs\n");
4443     if (linked_programs->next) {
4444         struct glsl_shader_prog_link *entry, *entry2;
4445
4446         ENTER_GL();
4447         if(pshader) {
4448             LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, pshader_entry) {
4449                 delete_glsl_program_entry(priv, gl_info, entry);
4450             }
4451         } else {
4452             LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
4453                 delete_glsl_program_entry(priv, gl_info, entry);
4454             }
4455         }
4456         LEAVE_GL();
4457     }
4458
4459     if(pshader) {
4460         UINT i;
4461         struct glsl_pshader_private *shader_data = ps->baseShader.backend_data;
4462
4463         ENTER_GL();
4464         for(i = 0; i < shader_data->num_gl_shaders; i++) {
4465             TRACE("deleting pshader %u\n", shader_data->gl_shaders[i].prgId);
4466             GL_EXTCALL(glDeleteObjectARB(shader_data->gl_shaders[i].prgId));
4467             checkGLcall("glDeleteObjectARB");
4468         }
4469         LEAVE_GL();
4470         HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
4471         HeapFree(GetProcessHeap(), 0, shader_data);
4472         ps->baseShader.backend_data = NULL;
4473     } else {
4474         UINT i;
4475         struct glsl_vshader_private *shader_data = vs->baseShader.backend_data;
4476
4477         ENTER_GL();
4478         for(i = 0; i < shader_data->num_gl_shaders; i++) {
4479             TRACE("deleting vshader %u\n", shader_data->gl_shaders[i].prgId);
4480             GL_EXTCALL(glDeleteObjectARB(shader_data->gl_shaders[i].prgId));
4481             checkGLcall("glDeleteObjectARB");
4482         }
4483         LEAVE_GL();
4484         HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
4485         HeapFree(GetProcessHeap(), 0, shader_data);
4486         vs->baseShader.backend_data = NULL;
4487     }
4488
4489     context_release(context);
4490 }
4491
4492 static int glsl_program_key_compare(const void *key, const struct wine_rb_entry *entry)
4493 {
4494     const glsl_program_key_t *k = key;
4495     const struct glsl_shader_prog_link *prog = WINE_RB_ENTRY_VALUE(entry,
4496             const struct glsl_shader_prog_link, program_lookup_entry);
4497     int cmp;
4498
4499     if (k->vshader > prog->vshader) return 1;
4500     else if (k->vshader < prog->vshader) return -1;
4501
4502     if (k->pshader > prog->pshader) return 1;
4503     else if (k->pshader < prog->pshader) return -1;
4504
4505     if (k->vshader && (cmp = memcmp(&k->vs_args, &prog->vs_args, sizeof(prog->vs_args)))) return cmp;
4506     if (k->pshader && (cmp = memcmp(&k->ps_args, &prog->ps_args, sizeof(prog->ps_args)))) return cmp;
4507
4508     return 0;
4509 }
4510
4511 static BOOL constant_heap_init(struct constant_heap *heap, unsigned int constant_count)
4512 {
4513     SIZE_T size = (constant_count + 1) * sizeof(*heap->entries) + constant_count * sizeof(*heap->positions);
4514     void *mem = HeapAlloc(GetProcessHeap(), 0, size);
4515
4516     if (!mem)
4517     {
4518         ERR("Failed to allocate memory\n");
4519         return FALSE;
4520     }
4521
4522     heap->entries = mem;
4523     heap->entries[1].version = 0;
4524     heap->positions = (unsigned int *)(heap->entries + constant_count + 1);
4525     heap->size = 1;
4526
4527     return TRUE;
4528 }
4529
4530 static void constant_heap_free(struct constant_heap *heap)
4531 {
4532     HeapFree(GetProcessHeap(), 0, heap->entries);
4533 }
4534
4535 static const struct wine_rb_functions wined3d_glsl_program_rb_functions =
4536 {
4537     wined3d_rb_alloc,
4538     wined3d_rb_realloc,
4539     wined3d_rb_free,
4540     glsl_program_key_compare,
4541 };
4542
4543 static HRESULT shader_glsl_alloc(IWineD3DDevice *iface) {
4544     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
4545     const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
4546     struct shader_glsl_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_glsl_priv));
4547     SIZE_T stack_size = wined3d_log2i(max(gl_info->limits.glsl_vs_float_constants,
4548             gl_info->limits.glsl_ps_float_constants)) + 1;
4549
4550     if (!shader_buffer_init(&priv->shader_buffer))
4551     {
4552         ERR("Failed to initialize shader buffer.\n");
4553         goto fail;
4554     }
4555
4556     priv->stack = HeapAlloc(GetProcessHeap(), 0, stack_size * sizeof(*priv->stack));
4557     if (!priv->stack)
4558     {
4559         ERR("Failed to allocate memory.\n");
4560         goto fail;
4561     }
4562
4563     if (!constant_heap_init(&priv->vconst_heap, gl_info->limits.glsl_vs_float_constants))
4564     {
4565         ERR("Failed to initialize vertex shader constant heap\n");
4566         goto fail;
4567     }
4568
4569     if (!constant_heap_init(&priv->pconst_heap, gl_info->limits.glsl_ps_float_constants))
4570     {
4571         ERR("Failed to initialize pixel shader constant heap\n");
4572         goto fail;
4573     }
4574
4575     if (wine_rb_init(&priv->program_lookup, &wined3d_glsl_program_rb_functions) == -1)
4576     {
4577         ERR("Failed to initialize rbtree.\n");
4578         goto fail;
4579     }
4580
4581     priv->next_constant_version = 1;
4582
4583     This->shader_priv = priv;
4584     return WINED3D_OK;
4585
4586 fail:
4587     constant_heap_free(&priv->pconst_heap);
4588     constant_heap_free(&priv->vconst_heap);
4589     HeapFree(GetProcessHeap(), 0, priv->stack);
4590     shader_buffer_free(&priv->shader_buffer);
4591     HeapFree(GetProcessHeap(), 0, priv);
4592     return E_OUTOFMEMORY;
4593 }
4594
4595 /* Context activation is done by the caller. */
4596 static void shader_glsl_free(IWineD3DDevice *iface) {
4597     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
4598     const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
4599     struct shader_glsl_priv *priv = This->shader_priv;
4600     int i;
4601
4602     ENTER_GL();
4603     for (i = 0; i < tex_type_count; ++i)
4604     {
4605         if (priv->depth_blt_program[i])
4606         {
4607             GL_EXTCALL(glDeleteObjectARB(priv->depth_blt_program[i]));
4608         }
4609     }
4610     LEAVE_GL();
4611
4612     wine_rb_destroy(&priv->program_lookup, NULL, NULL);
4613     constant_heap_free(&priv->pconst_heap);
4614     constant_heap_free(&priv->vconst_heap);
4615     HeapFree(GetProcessHeap(), 0, priv->stack);
4616     shader_buffer_free(&priv->shader_buffer);
4617
4618     HeapFree(GetProcessHeap(), 0, This->shader_priv);
4619     This->shader_priv = NULL;
4620 }
4621
4622 static BOOL shader_glsl_dirty_const(IWineD3DDevice *iface) {
4623     /* TODO: GL_EXT_bindable_uniform can be used to share constants across shaders */
4624     return FALSE;
4625 }
4626
4627 static void shader_glsl_get_caps(WINED3DDEVTYPE devtype,
4628         const struct wined3d_gl_info *gl_info, struct shader_caps *pCaps)
4629 {
4630     /* Nvidia Geforce6/7 or Ati R4xx/R5xx cards with GLSL support, support VS 3.0 but older Nvidia/Ati
4631      * models with GLSL support only support 2.0. In case of nvidia we can detect VS 2.0 support based
4632      * on the version of NV_vertex_program.
4633      * For Ati cards there's no way using glsl (it abstracts the lowlevel info away) and also not
4634      * using ARB_vertex_program. It is safe to assume that when a card supports pixel shader 2.0 it
4635      * supports vertex shader 2.0 too and the way around. We can detect ps2.0 using the maximum number
4636      * of native instructions, so use that here. For more info see the pixel shader versioning code below.
4637      */
4638     if ((gl_info->supported[NV_VERTEX_PROGRAM2] && !gl_info->supported[NV_VERTEX_PROGRAM3])
4639             || gl_info->limits.arb_ps_instructions <= 512)
4640         pCaps->VertexShaderVersion = WINED3DVS_VERSION(2,0);
4641     else
4642         pCaps->VertexShaderVersion = WINED3DVS_VERSION(3,0);
4643     TRACE_(d3d_caps)("Hardware vertex shader version %d.%d enabled (GLSL)\n", (pCaps->VertexShaderVersion >> 8) & 0xff, pCaps->VertexShaderVersion & 0xff);
4644     pCaps->MaxVertexShaderConst = gl_info->limits.glsl_vs_float_constants;
4645
4646     /* Older DX9-class videocards (GeforceFX / Radeon >9500/X*00) only support pixel shader 2.0/2.0a/2.0b.
4647      * In OpenGL the extensions related to GLSL abstract lowlevel GL info away which is needed
4648      * to distinguish between 2.0 and 3.0 (and 2.0a/2.0b). In case of Nvidia we use their fragment
4649      * program extensions. On other hardware including ATI GL_ARB_fragment_program offers the info
4650      * in max native instructions. Intel and others also offer the info in this extension but they
4651      * don't support GLSL (at least on Windows).
4652      *
4653      * PS2.0 requires at least 96 instructions, 2.0a/2.0b go up to 512. Assume that if the number
4654      * of instructions is 512 or less we have to do with ps2.0 hardware.
4655      * NOTE: ps3.0 hardware requires 512 or more instructions but ati and nvidia offer 'enough' (1024 vs 4096) on their most basic ps3.0 hardware.
4656      */
4657     if ((gl_info->supported[NV_FRAGMENT_PROGRAM] && !gl_info->supported[NV_FRAGMENT_PROGRAM2])
4658             || gl_info->limits.arb_ps_instructions <= 512)
4659         pCaps->PixelShaderVersion = WINED3DPS_VERSION(2,0);
4660     else
4661         pCaps->PixelShaderVersion = WINED3DPS_VERSION(3,0);
4662
4663     pCaps->MaxPixelShaderConst = gl_info->limits.glsl_ps_float_constants;
4664
4665     /* FIXME: The following line is card dependent. -8.0 to 8.0 is the
4666      * Direct3D minimum requirement.
4667      *
4668      * Both GL_ARB_fragment_program and GLSL require a "maximum representable magnitude"
4669      * of colors to be 2^10, and 2^32 for other floats. Should we use 1024 here?
4670      *
4671      * The problem is that the refrast clamps temporary results in the shader to
4672      * [-MaxValue;+MaxValue]. If the card's max value is bigger than the one we advertize here,
4673      * then applications may miss the clamping behavior. On the other hand, if it is smaller,
4674      * the shader will generate incorrect results too. Unfortunately, GL deliberately doesn't
4675      * offer a way to query this.
4676      */
4677     pCaps->PixelShader1xMaxValue = 8.0;
4678     TRACE_(d3d_caps)("Hardware pixel shader version %d.%d enabled (GLSL)\n", (pCaps->PixelShaderVersion >> 8) & 0xff, pCaps->PixelShaderVersion & 0xff);
4679
4680     pCaps->VSClipping = TRUE;
4681 }
4682
4683 static BOOL shader_glsl_color_fixup_supported(struct color_fixup_desc fixup)
4684 {
4685     if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
4686     {
4687         TRACE("Checking support for fixup:\n");
4688         dump_color_fixup_desc(fixup);
4689     }
4690
4691     /* We support everything except YUV conversions. */
4692     if (!is_yuv_fixup(fixup))
4693     {
4694         TRACE("[OK]\n");
4695         return TRUE;
4696     }
4697
4698     TRACE("[FAILED]\n");
4699     return FALSE;
4700 }
4701
4702 static const SHADER_HANDLER shader_glsl_instruction_handler_table[WINED3DSIH_TABLE_SIZE] =
4703 {
4704     /* WINED3DSIH_ABS           */ shader_glsl_map2gl,
4705     /* WINED3DSIH_ADD           */ shader_glsl_arith,
4706     /* WINED3DSIH_BEM           */ shader_glsl_bem,
4707     /* WINED3DSIH_BREAK         */ shader_glsl_break,
4708     /* WINED3DSIH_BREAKC        */ shader_glsl_breakc,
4709     /* WINED3DSIH_BREAKP        */ NULL,
4710     /* WINED3DSIH_CALL          */ shader_glsl_call,
4711     /* WINED3DSIH_CALLNZ        */ shader_glsl_callnz,
4712     /* WINED3DSIH_CMP           */ shader_glsl_cmp,
4713     /* WINED3DSIH_CND           */ shader_glsl_cnd,
4714     /* WINED3DSIH_CRS           */ shader_glsl_cross,
4715     /* WINED3DSIH_DCL           */ NULL,
4716     /* WINED3DSIH_DEF           */ NULL,
4717     /* WINED3DSIH_DEFB          */ NULL,
4718     /* WINED3DSIH_DEFI          */ NULL,
4719     /* WINED3DSIH_DP2ADD        */ shader_glsl_dp2add,
4720     /* WINED3DSIH_DP3           */ shader_glsl_dot,
4721     /* WINED3DSIH_DP4           */ shader_glsl_dot,
4722     /* WINED3DSIH_DST           */ shader_glsl_dst,
4723     /* WINED3DSIH_DSX           */ shader_glsl_map2gl,
4724     /* WINED3DSIH_DSY           */ shader_glsl_map2gl,
4725     /* WINED3DSIH_ELSE          */ shader_glsl_else,
4726     /* WINED3DSIH_ENDIF         */ shader_glsl_end,
4727     /* WINED3DSIH_ENDLOOP       */ shader_glsl_end,
4728     /* WINED3DSIH_ENDREP        */ shader_glsl_end,
4729     /* WINED3DSIH_EXP           */ shader_glsl_map2gl,
4730     /* WINED3DSIH_EXPP          */ shader_glsl_expp,
4731     /* WINED3DSIH_FRC           */ shader_glsl_map2gl,
4732     /* WINED3DSIH_IF            */ shader_glsl_if,
4733     /* WINED3DSIH_IFC           */ shader_glsl_ifc,
4734     /* WINED3DSIH_LABEL         */ shader_glsl_label,
4735     /* WINED3DSIH_LIT           */ shader_glsl_lit,
4736     /* WINED3DSIH_LOG           */ shader_glsl_log,
4737     /* WINED3DSIH_LOGP          */ shader_glsl_log,
4738     /* WINED3DSIH_LOOP          */ shader_glsl_loop,
4739     /* WINED3DSIH_LRP           */ shader_glsl_lrp,
4740     /* WINED3DSIH_M3x2          */ shader_glsl_mnxn,
4741     /* WINED3DSIH_M3x3          */ shader_glsl_mnxn,
4742     /* WINED3DSIH_M3x4          */ shader_glsl_mnxn,
4743     /* WINED3DSIH_M4x3          */ shader_glsl_mnxn,
4744     /* WINED3DSIH_M4x4          */ shader_glsl_mnxn,
4745     /* WINED3DSIH_MAD           */ shader_glsl_mad,
4746     /* WINED3DSIH_MAX           */ shader_glsl_map2gl,
4747     /* WINED3DSIH_MIN           */ shader_glsl_map2gl,
4748     /* WINED3DSIH_MOV           */ shader_glsl_mov,
4749     /* WINED3DSIH_MOVA          */ shader_glsl_mov,
4750     /* WINED3DSIH_MUL           */ shader_glsl_arith,
4751     /* WINED3DSIH_NOP           */ NULL,
4752     /* WINED3DSIH_NRM           */ shader_glsl_map2gl,
4753     /* WINED3DSIH_PHASE         */ NULL,
4754     /* WINED3DSIH_POW           */ shader_glsl_pow,
4755     /* WINED3DSIH_RCP           */ shader_glsl_rcp,
4756     /* WINED3DSIH_REP           */ shader_glsl_rep,
4757     /* WINED3DSIH_RET           */ shader_glsl_ret,
4758     /* WINED3DSIH_RSQ           */ shader_glsl_rsq,
4759     /* WINED3DSIH_SETP          */ NULL,
4760     /* WINED3DSIH_SGE           */ shader_glsl_compare,
4761     /* WINED3DSIH_SGN           */ shader_glsl_sgn,
4762     /* WINED3DSIH_SINCOS        */ shader_glsl_sincos,
4763     /* WINED3DSIH_SLT           */ shader_glsl_compare,
4764     /* WINED3DSIH_SUB           */ shader_glsl_arith,
4765     /* WINED3DSIH_TEX           */ shader_glsl_tex,
4766     /* WINED3DSIH_TEXBEM        */ shader_glsl_texbem,
4767     /* WINED3DSIH_TEXBEML       */ shader_glsl_texbem,
4768     /* WINED3DSIH_TEXCOORD      */ shader_glsl_texcoord,
4769     /* WINED3DSIH_TEXDEPTH      */ shader_glsl_texdepth,
4770     /* WINED3DSIH_TEXDP3        */ shader_glsl_texdp3,
4771     /* WINED3DSIH_TEXDP3TEX     */ shader_glsl_texdp3tex,
4772     /* WINED3DSIH_TEXKILL       */ shader_glsl_texkill,
4773     /* WINED3DSIH_TEXLDD        */ shader_glsl_texldd,
4774     /* WINED3DSIH_TEXLDL        */ shader_glsl_texldl,
4775     /* WINED3DSIH_TEXM3x2DEPTH  */ shader_glsl_texm3x2depth,
4776     /* WINED3DSIH_TEXM3x2PAD    */ shader_glsl_texm3x2pad,
4777     /* WINED3DSIH_TEXM3x2TEX    */ shader_glsl_texm3x2tex,
4778     /* WINED3DSIH_TEXM3x3       */ shader_glsl_texm3x3,
4779     /* WINED3DSIH_TEXM3x3DIFF   */ NULL,
4780     /* WINED3DSIH_TEXM3x3PAD    */ shader_glsl_texm3x3pad,
4781     /* WINED3DSIH_TEXM3x3SPEC   */ shader_glsl_texm3x3spec,
4782     /* WINED3DSIH_TEXM3x3TEX    */ shader_glsl_texm3x3tex,
4783     /* WINED3DSIH_TEXM3x3VSPEC  */ shader_glsl_texm3x3vspec,
4784     /* WINED3DSIH_TEXREG2AR     */ shader_glsl_texreg2ar,
4785     /* WINED3DSIH_TEXREG2GB     */ shader_glsl_texreg2gb,
4786     /* WINED3DSIH_TEXREG2RGB    */ shader_glsl_texreg2rgb,
4787 };
4788
4789 static void shader_glsl_handle_instruction(const struct wined3d_shader_instruction *ins) {
4790     SHADER_HANDLER hw_fct;
4791
4792     /* Select handler */
4793     hw_fct = shader_glsl_instruction_handler_table[ins->handler_idx];
4794
4795     /* Unhandled opcode */
4796     if (!hw_fct)
4797     {
4798         FIXME("Backend can't handle opcode %#x\n", ins->handler_idx);
4799         return;
4800     }
4801     hw_fct(ins);
4802
4803     shader_glsl_add_instruction_modifiers(ins);
4804 }
4805
4806 const shader_backend_t glsl_shader_backend = {
4807     shader_glsl_handle_instruction,
4808     shader_glsl_select,
4809     shader_glsl_select_depth_blt,
4810     shader_glsl_deselect_depth_blt,
4811     shader_glsl_update_float_vertex_constants,
4812     shader_glsl_update_float_pixel_constants,
4813     shader_glsl_load_constants,
4814     shader_glsl_load_np2fixup_constants,
4815     shader_glsl_destroy,
4816     shader_glsl_alloc,
4817     shader_glsl_free,
4818     shader_glsl_dirty_const,
4819     shader_glsl_get_caps,
4820     shader_glsl_color_fixup_supported,
4821 };