wined3d: Introduce a wined3d_vertex_pipe_ops structure.
[wine] / dlls / wined3d / glsl_shader.c
1 /*
2  * GLSL pixel and vertex shader implementation
3  *
4  * Copyright 2006 Jason Green
5  * Copyright 2006-2007 Henri Verbeet
6  * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
7  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22  */
23
24 /*
25  * D3D shader asm has swizzles on source parameters, and write masks for
26  * destination parameters. GLSL uses swizzles for both. The result of this is
27  * that for example "mov dst.xw, src.zyxw" becomes "dst.xw = src.zw" in GLSL.
28  * Ie, to generate a proper GLSL source swizzle, we need to take the D3D write
29  * mask for the destination parameter into account.
30  */
31
32 #include "config.h"
33 #include "wine/port.h"
34
35 #include <limits.h>
36 #include <stdio.h>
37
38 #include "wined3d_private.h"
39
40 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
41 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
42 WINE_DECLARE_DEBUG_CHANNEL(d3d);
43 WINE_DECLARE_DEBUG_CHANNEL(winediag);
44
45 #define WINED3D_GLSL_SAMPLE_PROJECTED   0x1
46 #define WINED3D_GLSL_SAMPLE_NPOT        0x2
47 #define WINED3D_GLSL_SAMPLE_LOD         0x4
48 #define WINED3D_GLSL_SAMPLE_GRAD        0x8
49
50 struct glsl_dst_param
51 {
52     char reg_name[150];
53     char mask_str[6];
54 };
55
56 struct glsl_src_param
57 {
58     char reg_name[150];
59     char param_str[200];
60 };
61
62 struct glsl_sample_function
63 {
64     const char *name;
65     DWORD coord_mask;
66 };
67
68 enum heap_node_op
69 {
70     HEAP_NODE_TRAVERSE_LEFT,
71     HEAP_NODE_TRAVERSE_RIGHT,
72     HEAP_NODE_POP,
73 };
74
75 struct constant_entry
76 {
77     unsigned int idx;
78     unsigned int version;
79 };
80
81 struct constant_heap
82 {
83     struct constant_entry *entries;
84     unsigned int *positions;
85     unsigned int size;
86 };
87
88 /* GLSL shader private data */
89 struct shader_glsl_priv {
90     struct wined3d_shader_buffer shader_buffer;
91     struct wine_rb_tree program_lookup;
92     struct glsl_shader_prog_link *glsl_program;
93     struct constant_heap vconst_heap;
94     struct constant_heap pconst_heap;
95     unsigned char *stack;
96     GLhandleARB depth_blt_program_full[tex_type_count];
97     GLhandleARB depth_blt_program_masked[tex_type_count];
98     UINT next_constant_version;
99
100     const struct wined3d_vertex_pipe_ops *vertex_pipe;
101     const struct fragment_pipeline *fragment_pipe;
102     struct wine_rb_tree ffp_fragment_shaders;
103     BOOL ffp_proj_control;
104 };
105
106 struct glsl_vs_program
107 {
108     struct list shader_entry;
109     GLhandleARB id;
110     GLenum vertex_color_clamp;
111     GLint *uniform_f_locations;
112     GLint uniform_i_locations[MAX_CONST_I];
113     GLint pos_fixup_location;
114 };
115
116 struct glsl_gs_program
117 {
118     struct list shader_entry;
119     GLhandleARB id;
120 };
121
122 struct glsl_ps_program
123 {
124     struct list shader_entry;
125     GLhandleARB id;
126     GLint *uniform_f_locations;
127     GLint uniform_i_locations[MAX_CONST_I];
128     GLint bumpenv_mat_location[MAX_TEXTURES];
129     GLint bumpenv_lum_scale_location[MAX_TEXTURES];
130     GLint bumpenv_lum_offset_location[MAX_TEXTURES];
131     GLint tex_factor_location;
132     GLint specular_enable_location;
133     GLint ycorrection_location;
134     GLint np2_fixup_location;
135     const struct ps_np2fixup_info *np2_fixup_info;
136 };
137
138 /* Struct to maintain data about a linked GLSL program */
139 struct glsl_shader_prog_link
140 {
141     struct wine_rb_entry program_lookup_entry;
142     struct glsl_vs_program vs;
143     struct glsl_gs_program gs;
144     struct glsl_ps_program ps;
145     GLhandleARB programId;
146     UINT constant_version;
147 };
148
149 struct glsl_program_key
150 {
151     GLhandleARB vs_id;
152     GLhandleARB gs_id;
153     GLhandleARB ps_id;
154 };
155
156 struct shader_glsl_ctx_priv {
157     const struct vs_compile_args    *cur_vs_args;
158     const struct ps_compile_args    *cur_ps_args;
159     struct ps_np2fixup_info         *cur_np2fixup_info;
160 };
161
162 struct glsl_ps_compiled_shader
163 {
164     struct ps_compile_args          args;
165     struct ps_np2fixup_info         np2fixup;
166     GLhandleARB                     prgId;
167 };
168
169 struct glsl_vs_compiled_shader
170 {
171     struct vs_compile_args          args;
172     GLhandleARB                     prgId;
173 };
174
175 struct glsl_gs_compiled_shader
176 {
177     GLhandleARB id;
178 };
179
180 struct glsl_shader_private
181 {
182     union
183     {
184         struct glsl_vs_compiled_shader *vs;
185         struct glsl_gs_compiled_shader *gs;
186         struct glsl_ps_compiled_shader *ps;
187     } gl_shaders;
188     UINT num_gl_shaders, shader_array_size;
189 };
190
191 struct glsl_ffp_fragment_shader
192 {
193     struct ffp_frag_desc entry;
194     GLhandleARB id;
195     struct list linked_programs;
196 };
197
198 static const char *debug_gl_shader_type(GLenum type)
199 {
200     switch (type)
201     {
202 #define WINED3D_TO_STR(u) case u: return #u
203         WINED3D_TO_STR(GL_VERTEX_SHADER_ARB);
204         WINED3D_TO_STR(GL_GEOMETRY_SHADER_ARB);
205         WINED3D_TO_STR(GL_FRAGMENT_SHADER_ARB);
206 #undef WINED3D_TO_STR
207         default:
208             return wine_dbg_sprintf("UNKNOWN(%#x)", type);
209     }
210 }
211
212 static const char *shader_glsl_get_prefix(enum wined3d_shader_type type)
213 {
214     switch (type)
215     {
216         case WINED3D_SHADER_TYPE_VERTEX:
217             return "vs";
218
219         case WINED3D_SHADER_TYPE_GEOMETRY:
220             return "gs";
221
222         case WINED3D_SHADER_TYPE_PIXEL:
223             return "ps";
224
225         default:
226             FIXME("Unhandled shader type %#x.\n", type);
227             return "unknown";
228     }
229 }
230
231 /* Extract a line from the info log.
232  * Note that this modifies the source string. */
233 static char *get_info_log_line(char **ptr)
234 {
235     char *p, *q;
236
237     p = *ptr;
238     if (!(q = strstr(p, "\n")))
239     {
240         if (!*p) return NULL;
241         *ptr += strlen(p);
242         return p;
243     }
244     *q = '\0';
245     *ptr = q + 1;
246
247     return p;
248 }
249
250 /* Context activation is done by the caller. */
251 static void print_glsl_info_log(const struct wined3d_gl_info *gl_info, GLhandleARB obj)
252 {
253     int infologLength = 0;
254     char *infoLog;
255
256     if (!WARN_ON(d3d_shader) && !FIXME_ON(d3d_shader))
257         return;
258
259     GL_EXTCALL(glGetObjectParameterivARB(obj,
260                GL_OBJECT_INFO_LOG_LENGTH_ARB,
261                &infologLength));
262
263     /* A size of 1 is just a null-terminated string, so the log should be bigger than
264      * that if there are errors. */
265     if (infologLength > 1)
266     {
267         char *ptr, *line;
268
269         infoLog = HeapAlloc(GetProcessHeap(), 0, infologLength);
270         /* The info log is supposed to be zero-terminated, but at least some
271          * versions of fglrx don't terminate the string properly. The reported
272          * length does include the terminator, so explicitly set it to zero
273          * here. */
274         infoLog[infologLength - 1] = 0;
275         GL_EXTCALL(glGetInfoLogARB(obj, infologLength, NULL, infoLog));
276
277         ptr = infoLog;
278         if (gl_info->quirks & WINED3D_QUIRK_INFO_LOG_SPAM)
279         {
280             WARN("Info log received from GLSL shader #%u:\n", obj);
281             while ((line = get_info_log_line(&ptr))) WARN("    %s\n", line);
282         }
283         else
284         {
285             FIXME("Info log received from GLSL shader #%u:\n", obj);
286             while ((line = get_info_log_line(&ptr))) FIXME("    %s\n", line);
287         }
288         HeapFree(GetProcessHeap(), 0, infoLog);
289     }
290 }
291
292 /* Context activation is done by the caller. */
293 static void shader_glsl_compile(const struct wined3d_gl_info *gl_info, GLhandleARB shader, const char *src)
294 {
295     TRACE("Compiling shader object %u.\n", shader);
296     GL_EXTCALL(glShaderSourceARB(shader, 1, &src, NULL));
297     checkGLcall("glShaderSourceARB");
298     GL_EXTCALL(glCompileShaderARB(shader));
299     checkGLcall("glCompileShaderARB");
300     print_glsl_info_log(gl_info, shader);
301 }
302
303 /* Context activation is done by the caller. */
304 static void shader_glsl_dump_program_source(const struct wined3d_gl_info *gl_info, GLhandleARB program)
305 {
306     GLint i, object_count, source_size = -1;
307     GLhandleARB *objects;
308     char *source = NULL;
309
310     GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_ATTACHED_OBJECTS_ARB, &object_count));
311     objects = HeapAlloc(GetProcessHeap(), 0, object_count * sizeof(*objects));
312     if (!objects)
313     {
314         ERR("Failed to allocate object array memory.\n");
315         return;
316     }
317
318     GL_EXTCALL(glGetAttachedObjectsARB(program, object_count, NULL, objects));
319     for (i = 0; i < object_count; ++i)
320     {
321         char *ptr, *line;
322         GLint tmp;
323
324         GL_EXTCALL(glGetObjectParameterivARB(objects[i], GL_OBJECT_SHADER_SOURCE_LENGTH_ARB, &tmp));
325
326         if (source_size < tmp)
327         {
328             HeapFree(GetProcessHeap(), 0, source);
329
330             source = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, tmp);
331             if (!source)
332             {
333                 ERR("Failed to allocate %d bytes for shader source.\n", tmp);
334                 HeapFree(GetProcessHeap(), 0, objects);
335                 return;
336             }
337             source_size = tmp;
338         }
339
340         FIXME("Object %u:\n", objects[i]);
341         GL_EXTCALL(glGetObjectParameterivARB(objects[i], GL_OBJECT_SUBTYPE_ARB, &tmp));
342         FIXME("    GL_OBJECT_SUBTYPE_ARB: %s.\n", debug_gl_shader_type(tmp));
343         GL_EXTCALL(glGetObjectParameterivARB(objects[i], GL_OBJECT_COMPILE_STATUS_ARB, &tmp));
344         FIXME("    GL_OBJECT_COMPILE_STATUS_ARB: %d.\n", tmp);
345         FIXME("\n");
346
347         ptr = source;
348         GL_EXTCALL(glGetShaderSourceARB(objects[i], source_size, NULL, source));
349         while ((line = get_info_log_line(&ptr))) FIXME("    %s\n", line);
350         FIXME("\n");
351     }
352
353     HeapFree(GetProcessHeap(), 0, source);
354     HeapFree(GetProcessHeap(), 0, objects);
355 }
356
357 /* Context activation is done by the caller. */
358 static void shader_glsl_validate_link(const struct wined3d_gl_info *gl_info, GLhandleARB program)
359 {
360     GLint tmp;
361
362     if (!TRACE_ON(d3d_shader) && !FIXME_ON(d3d_shader)) return;
363
364     GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_TYPE_ARB, &tmp));
365     if (tmp == GL_PROGRAM_OBJECT_ARB)
366     {
367         GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_LINK_STATUS_ARB, &tmp));
368         if (!tmp)
369         {
370             FIXME("Program %u link status invalid.\n", program);
371             shader_glsl_dump_program_source(gl_info, program);
372         }
373     }
374
375     print_glsl_info_log(gl_info, program);
376 }
377
378 /* Context activation is done by the caller. */
379 static void shader_glsl_load_psamplers(const struct wined3d_gl_info *gl_info,
380         const DWORD *tex_unit_map, GLhandleARB programId)
381 {
382     GLint name_loc;
383     char sampler_name[20];
384     unsigned int i;
385
386     for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i)
387     {
388         snprintf(sampler_name, sizeof(sampler_name), "ps_sampler%u", i);
389         name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
390         if (name_loc != -1) {
391             DWORD mapped_unit = tex_unit_map[i];
392             if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.fragment_samplers)
393             {
394                 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
395                 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
396                 checkGLcall("glUniform1iARB");
397             } else {
398                 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
399             }
400         }
401     }
402 }
403
404 /* Context activation is done by the caller. */
405 static void shader_glsl_load_vsamplers(const struct wined3d_gl_info *gl_info,
406         const DWORD *tex_unit_map, GLhandleARB programId)
407 {
408     GLint name_loc;
409     char sampler_name[20];
410     unsigned int i;
411
412     for (i = 0; i < MAX_VERTEX_SAMPLERS; ++i)
413     {
414         snprintf(sampler_name, sizeof(sampler_name), "vs_sampler%u", i);
415         name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
416         if (name_loc != -1) {
417             DWORD mapped_unit = tex_unit_map[MAX_FRAGMENT_SAMPLERS + i];
418             if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.combined_samplers)
419             {
420                 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
421                 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
422                 checkGLcall("glUniform1iARB");
423             } else {
424                 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
425             }
426         }
427     }
428 }
429
430 /* Context activation is done by the caller. */
431 static inline void walk_constant_heap(const struct wined3d_gl_info *gl_info, const float *constants,
432         const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
433 {
434     int stack_idx = 0;
435     unsigned int heap_idx = 1;
436     unsigned int idx;
437
438     if (heap->entries[heap_idx].version <= version) return;
439
440     idx = heap->entries[heap_idx].idx;
441     if (constant_locations[idx] != -1) GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
442     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
443
444     while (stack_idx >= 0)
445     {
446         /* Note that we fall through to the next case statement. */
447         switch(stack[stack_idx])
448         {
449             case HEAP_NODE_TRAVERSE_LEFT:
450             {
451                 unsigned int left_idx = heap_idx << 1;
452                 if (left_idx < heap->size && heap->entries[left_idx].version > version)
453                 {
454                     heap_idx = left_idx;
455                     idx = heap->entries[heap_idx].idx;
456                     if (constant_locations[idx] != -1)
457                         GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
458
459                     stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
460                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
461                     break;
462                 }
463             }
464
465             case HEAP_NODE_TRAVERSE_RIGHT:
466             {
467                 unsigned int right_idx = (heap_idx << 1) + 1;
468                 if (right_idx < heap->size && heap->entries[right_idx].version > version)
469                 {
470                     heap_idx = right_idx;
471                     idx = heap->entries[heap_idx].idx;
472                     if (constant_locations[idx] != -1)
473                         GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
474
475                     stack[stack_idx++] = HEAP_NODE_POP;
476                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
477                     break;
478                 }
479             }
480
481             case HEAP_NODE_POP:
482                 heap_idx >>= 1;
483                 --stack_idx;
484                 break;
485         }
486     }
487     checkGLcall("walk_constant_heap()");
488 }
489
490 /* Context activation is done by the caller. */
491 static inline void apply_clamped_constant(const struct wined3d_gl_info *gl_info, GLint location, const GLfloat *data)
492 {
493     GLfloat clamped_constant[4];
494
495     if (location == -1) return;
496
497     clamped_constant[0] = data[0] < -1.0f ? -1.0f : data[0] > 1.0f ? 1.0f : data[0];
498     clamped_constant[1] = data[1] < -1.0f ? -1.0f : data[1] > 1.0f ? 1.0f : data[1];
499     clamped_constant[2] = data[2] < -1.0f ? -1.0f : data[2] > 1.0f ? 1.0f : data[2];
500     clamped_constant[3] = data[3] < -1.0f ? -1.0f : data[3] > 1.0f ? 1.0f : data[3];
501
502     GL_EXTCALL(glUniform4fvARB(location, 1, clamped_constant));
503 }
504
505 /* Context activation is done by the caller. */
506 static inline void walk_constant_heap_clamped(const struct wined3d_gl_info *gl_info, const float *constants,
507         const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
508 {
509     int stack_idx = 0;
510     unsigned int heap_idx = 1;
511     unsigned int idx;
512
513     if (heap->entries[heap_idx].version <= version) return;
514
515     idx = heap->entries[heap_idx].idx;
516     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
517     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
518
519     while (stack_idx >= 0)
520     {
521         /* Note that we fall through to the next case statement. */
522         switch(stack[stack_idx])
523         {
524             case HEAP_NODE_TRAVERSE_LEFT:
525             {
526                 unsigned int left_idx = heap_idx << 1;
527                 if (left_idx < heap->size && heap->entries[left_idx].version > version)
528                 {
529                     heap_idx = left_idx;
530                     idx = heap->entries[heap_idx].idx;
531                     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
532
533                     stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
534                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
535                     break;
536                 }
537             }
538
539             case HEAP_NODE_TRAVERSE_RIGHT:
540             {
541                 unsigned int right_idx = (heap_idx << 1) + 1;
542                 if (right_idx < heap->size && heap->entries[right_idx].version > version)
543                 {
544                     heap_idx = right_idx;
545                     idx = heap->entries[heap_idx].idx;
546                     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
547
548                     stack[stack_idx++] = HEAP_NODE_POP;
549                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
550                     break;
551                 }
552             }
553
554             case HEAP_NODE_POP:
555                 heap_idx >>= 1;
556                 --stack_idx;
557                 break;
558         }
559     }
560     checkGLcall("walk_constant_heap_clamped()");
561 }
562
563 /* Context activation is done by the caller. */
564 static void shader_glsl_load_constantsF(const struct wined3d_shader *shader, const struct wined3d_gl_info *gl_info,
565         const float *constants, const GLint *constant_locations, const struct constant_heap *heap,
566         unsigned char *stack, UINT version)
567 {
568     const struct wined3d_shader_lconst *lconst;
569
570     /* 1.X pshaders have the constants clamped to [-1;1] implicitly. */
571     if (shader->reg_maps.shader_version.major == 1
572             && shader->reg_maps.shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
573         walk_constant_heap_clamped(gl_info, constants, constant_locations, heap, stack, version);
574     else
575         walk_constant_heap(gl_info, constants, constant_locations, heap, stack, version);
576
577     if (!shader->load_local_constsF)
578     {
579         TRACE("No need to load local float constants for this shader\n");
580         return;
581     }
582
583     /* Immediate constants are clamped to [-1;1] at shader creation time if needed */
584     LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
585     {
586         GL_EXTCALL(glUniform4fvARB(constant_locations[lconst->idx], 1, (const GLfloat *)lconst->value));
587     }
588     checkGLcall("glUniform4fvARB()");
589 }
590
591 /* Context activation is done by the caller. */
592 static void shader_glsl_load_constantsI(const struct wined3d_shader *shader, const struct wined3d_gl_info *gl_info,
593         const GLint locations[MAX_CONST_I], const int *constants, WORD constants_set)
594 {
595     unsigned int i;
596     struct list* ptr;
597
598     for (i = 0; constants_set; constants_set >>= 1, ++i)
599     {
600         if (!(constants_set & 1)) continue;
601
602         TRACE_(d3d_constants)("Loading constants %u: %i, %i, %i, %i\n",
603                 i, constants[i*4], constants[i*4+1], constants[i*4+2], constants[i*4+3]);
604
605         /* We found this uniform name in the program - go ahead and send the data */
606         GL_EXTCALL(glUniform4ivARB(locations[i], 1, &constants[i*4]));
607         checkGLcall("glUniform4ivARB");
608     }
609
610     /* Load immediate constants */
611     ptr = list_head(&shader->constantsI);
612     while (ptr)
613     {
614         const struct wined3d_shader_lconst *lconst = LIST_ENTRY(ptr, const struct wined3d_shader_lconst, entry);
615         unsigned int idx = lconst->idx;
616         const GLint *values = (const GLint *)lconst->value;
617
618         TRACE_(d3d_constants)("Loading local constants %i: %i, %i, %i, %i\n", idx,
619             values[0], values[1], values[2], values[3]);
620
621         /* We found this uniform name in the program - go ahead and send the data */
622         GL_EXTCALL(glUniform4ivARB(locations[idx], 1, values));
623         checkGLcall("glUniform4ivARB");
624         ptr = list_next(&shader->constantsI, ptr);
625     }
626 }
627
628 /* Context activation is done by the caller. */
629 static void shader_glsl_load_constantsB(const struct wined3d_shader *shader, const struct wined3d_gl_info *gl_info,
630         GLhandleARB programId, const BOOL *constants, WORD constants_set)
631 {
632     GLint tmp_loc;
633     unsigned int i;
634     char tmp_name[10];
635     const char *prefix;
636     struct list* ptr;
637
638     prefix = shader_glsl_get_prefix(shader->reg_maps.shader_version.type);
639
640     /* TODO: Benchmark and see if it would be beneficial to store the
641      * locations of the constants to avoid looking up each time */
642     for (i = 0; constants_set; constants_set >>= 1, ++i)
643     {
644         if (!(constants_set & 1)) continue;
645
646         TRACE_(d3d_constants)("Loading constants %i: %i;\n", i, constants[i]);
647
648         /* TODO: Benchmark and see if it would be beneficial to store the
649          * locations of the constants to avoid looking up each time */
650         snprintf(tmp_name, sizeof(tmp_name), "%s_b[%i]", prefix, i);
651         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
652         GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, &constants[i]));
653     }
654
655     /* Load immediate constants */
656     ptr = list_head(&shader->constantsB);
657     while (ptr)
658     {
659         const struct wined3d_shader_lconst *lconst = LIST_ENTRY(ptr, const struct wined3d_shader_lconst, entry);
660         unsigned int idx = lconst->idx;
661         const GLint *values = (const GLint *)lconst->value;
662
663         TRACE_(d3d_constants)("Loading local constants %i: %i\n", idx, values[0]);
664
665         snprintf(tmp_name, sizeof(tmp_name), "%s_b[%i]", prefix, idx);
666         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
667         GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, values));
668         ptr = list_next(&shader->constantsB, ptr);
669     }
670
671     checkGLcall("shader_glsl_load_constantsB()");
672 }
673
674 static void reset_program_constant_version(struct wine_rb_entry *entry, void *context)
675 {
676     WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry)->constant_version = 0;
677 }
678
679 /* Context activation is done by the caller (state handler). */
680 static void shader_glsl_load_np2fixup_constants(void *shader_priv,
681         const struct wined3d_gl_info *gl_info, const struct wined3d_state *state)
682 {
683     struct shader_glsl_priv *glsl_priv = shader_priv;
684     const struct glsl_shader_prog_link *prog = glsl_priv->glsl_program;
685
686     /* No GLSL program set - nothing to do. */
687     if (!prog) return;
688
689     /* NP2 texcoord fixup is (currently) only done for pixelshaders. */
690     if (!use_ps(state)) return;
691
692     if (prog->ps.np2_fixup_info && prog->ps.np2_fixup_location != -1)
693     {
694         UINT i;
695         UINT fixup = prog->ps.np2_fixup_info->active;
696         GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS];
697
698         for (i = 0; fixup; fixup >>= 1, ++i)
699         {
700             const struct wined3d_texture *tex = state->textures[i];
701             const unsigned char idx = prog->ps.np2_fixup_info->idx[i];
702             GLfloat *tex_dim = &np2fixup_constants[(idx >> 1) * 4];
703
704             if (!tex)
705             {
706                 ERR("Nonexistent texture is flagged for NP2 texcoord fixup.\n");
707                 continue;
708             }
709
710             if (idx % 2)
711             {
712                 tex_dim[2] = tex->pow2_matrix[0];
713                 tex_dim[3] = tex->pow2_matrix[5];
714             }
715             else
716             {
717                 tex_dim[0] = tex->pow2_matrix[0];
718                 tex_dim[1] = tex->pow2_matrix[5];
719             }
720         }
721
722         GL_EXTCALL(glUniform4fvARB(prog->ps.np2_fixup_location,
723                 prog->ps.np2_fixup_info->num_consts, np2fixup_constants));
724     }
725 }
726
727 /* Context activation is done by the caller (state handler). */
728 static void shader_glsl_load_constants(const struct wined3d_context *context,
729         BOOL usePixelShader, BOOL useVertexShader)
730 {
731     const struct wined3d_gl_info *gl_info = context->gl_info;
732     struct wined3d_device *device = context->swapchain->device;
733     struct wined3d_stateblock *stateBlock = device->stateBlock;
734     const struct wined3d_state *state = &stateBlock->state;
735     struct shader_glsl_priv *priv = device->shader_priv;
736     float position_fixup[4];
737
738     GLhandleARB programId;
739     struct glsl_shader_prog_link *prog = priv->glsl_program;
740     UINT constant_version;
741     int i;
742
743     if (!prog) {
744         /* No GLSL program set - nothing to do. */
745         return;
746     }
747     programId = prog->programId;
748     constant_version = prog->constant_version;
749
750     if (useVertexShader)
751     {
752         const struct wined3d_shader *vshader = state->vertex_shader;
753
754         /* Load DirectX 9 float constants/uniforms for vertex shader */
755         shader_glsl_load_constantsF(vshader, gl_info, state->vs_consts_f,
756                 prog->vs.uniform_f_locations, &priv->vconst_heap, priv->stack, constant_version);
757
758         /* Load DirectX 9 integer constants/uniforms for vertex shader */
759         shader_glsl_load_constantsI(vshader, gl_info, prog->vs.uniform_i_locations, state->vs_consts_i,
760                 stateBlock->changed.vertexShaderConstantsI & vshader->reg_maps.integer_constants);
761
762         /* Load DirectX 9 boolean constants/uniforms for vertex shader */
763         shader_glsl_load_constantsB(vshader, gl_info, programId, state->vs_consts_b,
764                 stateBlock->changed.vertexShaderConstantsB & vshader->reg_maps.boolean_constants);
765
766         /* Upload the position fixup params */
767         shader_get_position_fixup(context, state, position_fixup);
768         GL_EXTCALL(glUniform4fvARB(prog->vs.pos_fixup_location, 1, position_fixup));
769         checkGLcall("glUniform4fvARB");
770     }
771
772     if (usePixelShader)
773     {
774         const struct wined3d_shader *pshader = state->pixel_shader;
775
776         /* Load DirectX 9 float constants/uniforms for pixel shader */
777         shader_glsl_load_constantsF(pshader, gl_info, state->ps_consts_f,
778                 prog->ps.uniform_f_locations, &priv->pconst_heap, priv->stack, constant_version);
779
780         /* Load DirectX 9 integer constants/uniforms for pixel shader */
781         shader_glsl_load_constantsI(pshader, gl_info, prog->ps.uniform_i_locations, state->ps_consts_i,
782                 stateBlock->changed.pixelShaderConstantsI & pshader->reg_maps.integer_constants);
783
784         /* Load DirectX 9 boolean constants/uniforms for pixel shader */
785         shader_glsl_load_constantsB(pshader, gl_info, programId, state->ps_consts_b,
786                 stateBlock->changed.pixelShaderConstantsB & pshader->reg_maps.boolean_constants);
787
788         /* Upload the environment bump map matrix if needed. The needsbumpmat
789          * member specifies the texture stage to load the matrix from. It
790          * can't be 0 for a valid texbem instruction. */
791         for (i = 0; i < MAX_TEXTURES; ++i)
792         {
793             const float *data;
794
795             if (prog->ps.bumpenv_mat_location[i] == -1)
796                 continue;
797
798             data = (const float *)&state->texture_states[i][WINED3D_TSS_BUMPENV_MAT00];
799             GL_EXTCALL(glUniformMatrix2fvARB(prog->ps.bumpenv_mat_location[i], 1, 0, data));
800             checkGLcall("glUniformMatrix2fvARB");
801
802             /* texbeml needs the luminance scale and offset too. If texbeml
803              * is used, needsbumpmat is set too, so we can check that in the
804              * needsbumpmat check. */
805             if (prog->ps.bumpenv_lum_scale_location[i] != -1)
806             {
807                 const GLfloat *scale = (const GLfloat *)&state->texture_states[i][WINED3D_TSS_BUMPENV_LSCALE];
808                 const GLfloat *offset = (const GLfloat *)&state->texture_states[i][WINED3D_TSS_BUMPENV_LOFFSET];
809
810                 GL_EXTCALL(glUniform1fvARB(prog->ps.bumpenv_lum_scale_location[i], 1, scale));
811                 checkGLcall("glUniform1fvARB");
812                 GL_EXTCALL(glUniform1fvARB(prog->ps.bumpenv_lum_offset_location[i], 1, offset));
813                 checkGLcall("glUniform1fvARB");
814             }
815         }
816
817         if (prog->ps.ycorrection_location != -1)
818         {
819             float correction_params[4];
820
821             if (context->render_offscreen)
822             {
823                 correction_params[0] = 0.0f;
824                 correction_params[1] = 1.0f;
825             } else {
826                 /* position is window relative, not viewport relative */
827                 correction_params[0] = (float) context->current_rt->resource.height;
828                 correction_params[1] = -1.0f;
829             }
830             GL_EXTCALL(glUniform4fvARB(prog->ps.ycorrection_location, 1, correction_params));
831         }
832     }
833     else if (priv->fragment_pipe == &glsl_fragment_pipe)
834     {
835         float col[4];
836
837         for (i = 0; i < MAX_TEXTURES; ++i)
838         {
839             GL_EXTCALL(glUniformMatrix2fvARB(prog->ps.bumpenv_mat_location[i], 1, 0,
840                         (const float *)&state->texture_states[i][WINED3D_TSS_BUMPENV_MAT00]));
841             GL_EXTCALL(glUniform1fARB(prog->ps.bumpenv_lum_scale_location[i],
842                         *(const float *)&state->texture_states[i][WINED3D_TSS_BUMPENV_LSCALE]));
843             GL_EXTCALL(glUniform1fARB(prog->ps.bumpenv_lum_offset_location[i],
844                         *(const float *)&state->texture_states[i][WINED3D_TSS_BUMPENV_LOFFSET]));
845         }
846
847         D3DCOLORTOGLFLOAT4(state->render_states[WINED3D_RS_TEXTUREFACTOR], col);
848         GL_EXTCALL(glUniform4fARB(prog->ps.tex_factor_location, col[0], col[1], col[2], col[3]));
849
850         if (state->render_states[WINED3D_RS_SPECULARENABLE])
851             GL_EXTCALL(glUniform4fARB(prog->ps.specular_enable_location, 1.0f, 1.0f, 1.0f, 0.0f));
852         else
853             GL_EXTCALL(glUniform4fARB(prog->ps.specular_enable_location, 0.0f, 0.0f, 0.0f, 0.0f));
854
855         checkGLcall("fixed function uniforms");
856     }
857
858     if (priv->next_constant_version == UINT_MAX)
859     {
860         TRACE("Max constant version reached, resetting to 0.\n");
861         wine_rb_for_each_entry(&priv->program_lookup, reset_program_constant_version, NULL);
862         priv->next_constant_version = 1;
863     }
864     else
865     {
866         prog->constant_version = priv->next_constant_version++;
867     }
868 }
869
870 static void update_heap_entry(const struct constant_heap *heap, unsigned int idx,
871         unsigned int heap_idx, DWORD new_version)
872 {
873     struct constant_entry *entries = heap->entries;
874     unsigned int *positions = heap->positions;
875     unsigned int parent_idx;
876
877     while (heap_idx > 1)
878     {
879         parent_idx = heap_idx >> 1;
880
881         if (new_version <= entries[parent_idx].version) break;
882
883         entries[heap_idx] = entries[parent_idx];
884         positions[entries[parent_idx].idx] = heap_idx;
885         heap_idx = parent_idx;
886     }
887
888     entries[heap_idx].version = new_version;
889     entries[heap_idx].idx = idx;
890     positions[idx] = heap_idx;
891 }
892
893 static void shader_glsl_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count)
894 {
895     struct shader_glsl_priv *priv = device->shader_priv;
896     struct constant_heap *heap = &priv->vconst_heap;
897     UINT i;
898
899     for (i = start; i < count + start; ++i)
900     {
901         if (!device->stateBlock->changed.vertexShaderConstantsF[i])
902             update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
903         else
904             update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
905     }
906 }
907
908 static void shader_glsl_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count)
909 {
910     struct shader_glsl_priv *priv = device->shader_priv;
911     struct constant_heap *heap = &priv->pconst_heap;
912     UINT i;
913
914     for (i = start; i < count + start; ++i)
915     {
916         if (!device->stateBlock->changed.pixelShaderConstantsF[i])
917             update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
918         else
919             update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
920     }
921 }
922
923 static unsigned int vec4_varyings(DWORD shader_major, const struct wined3d_gl_info *gl_info)
924 {
925     unsigned int ret = gl_info->limits.glsl_varyings / 4;
926     /* 4.0 shaders do not write clip coords because d3d10 does not support user clipplanes */
927     if(shader_major > 3) return ret;
928
929     /* 3.0 shaders may need an extra varying for the clip coord on some cards(mostly dx10 ones) */
930     if (gl_info->quirks & WINED3D_QUIRK_GLSL_CLIP_VARYING) ret -= 1;
931     return ret;
932 }
933
934 /** Generate the variable & register declarations for the GLSL output target */
935 static void shader_generate_glsl_declarations(const struct wined3d_context *context,
936         struct wined3d_shader_buffer *buffer, const struct wined3d_shader *shader,
937         const struct wined3d_shader_reg_maps *reg_maps, const struct shader_glsl_ctx_priv *ctx_priv)
938 {
939     const struct wined3d_shader_version *version = &reg_maps->shader_version;
940     const struct wined3d_state *state = &shader->device->stateBlock->state;
941     const struct ps_compile_args *ps_args = ctx_priv->cur_ps_args;
942     const struct wined3d_gl_info *gl_info = context->gl_info;
943     const struct wined3d_fb_state *fb = &shader->device->fb;
944     unsigned int i, extra_constants_needed = 0;
945     const struct wined3d_shader_lconst *lconst;
946     const char *prefix;
947     DWORD map;
948
949     prefix = shader_glsl_get_prefix(version->type);
950
951     /* Prototype the subroutines */
952     for (i = 0, map = reg_maps->labels; map; map >>= 1, ++i)
953     {
954         if (map & 1) shader_addline(buffer, "void subroutine%u();\n", i);
955     }
956
957     /* Declare the constants (aka uniforms) */
958     if (shader->limits.constant_float > 0)
959     {
960         unsigned max_constantsF;
961
962         /* Unless the shader uses indirect addressing, always declare the
963          * maximum array size and ignore that we need some uniforms privately.
964          * E.g. if GL supports 256 uniforms, and we need 2 for the pos fixup
965          * and immediate values, still declare VC[256]. If the shader needs
966          * more uniforms than we have it won't work in any case. If it uses
967          * less, the compiler will figure out which uniforms are really used
968          * and strip them out. This allows a shader to use c255 on a dx9 card,
969          * as long as it doesn't also use all the other constants.
970          *
971          * If the shader uses indirect addressing the compiler must assume
972          * that all declared uniforms are used. In this case, declare only the
973          * amount that we're assured to have.
974          *
975          * Thus we run into problems in these two cases:
976          * 1) The shader really uses more uniforms than supported.
977          * 2) The shader uses indirect addressing, less constants than
978          *    supported, but uses a constant index > #supported consts. */
979         if (version->type == WINED3D_SHADER_TYPE_PIXEL)
980         {
981             /* No indirect addressing here. */
982             max_constantsF = gl_info->limits.glsl_ps_float_constants;
983         }
984         else
985         {
986             if (reg_maps->usesrelconstF)
987             {
988                 /* Subtract the other potential uniforms from the max
989                  * available (bools, ints, and 1 row of projection matrix).
990                  * Subtract another uniform for immediate values, which have
991                  * to be loaded via uniform by the driver as well. The shader
992                  * code only uses 0.5, 2.0, 1.0, 128 and -128 in vertex
993                  * shader code, so one vec4 should be enough. (Unfortunately
994                  * the Nvidia driver doesn't store 128 and -128 in one float).
995                  *
996                  * Writing gl_ClipVertex requires one uniform for each
997                  * clipplane as well. */
998                 max_constantsF = gl_info->limits.glsl_vs_float_constants - 3;
999                 if(ctx_priv->cur_vs_args->clip_enabled)
1000                 {
1001                     max_constantsF -= gl_info->limits.clipplanes;
1002                 }
1003                 max_constantsF -= count_bits(reg_maps->integer_constants);
1004                 /* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly,
1005                  * so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but
1006                  * for now take this into account when calculating the number of available constants
1007                  */
1008                 max_constantsF -= count_bits(reg_maps->boolean_constants);
1009                 /* Set by driver quirks in directx.c */
1010                 max_constantsF -= gl_info->reserved_glsl_constants;
1011
1012                 if (max_constantsF < shader->limits.constant_float)
1013                 {
1014                     static unsigned int once;
1015
1016                     if (!once++)
1017                         ERR_(winediag)("The hardware does not support enough uniform components to run this shader,"
1018                                 " it may not render correctly.\n");
1019                     else
1020                         WARN("The hardware does not support enough uniform components to run this shader.\n");
1021                 }
1022             }
1023             else
1024             {
1025                 max_constantsF = gl_info->limits.glsl_vs_float_constants;
1026             }
1027         }
1028         max_constantsF = min(shader->limits.constant_float, max_constantsF);
1029         shader_addline(buffer, "uniform vec4 %s_c[%u];\n", prefix, max_constantsF);
1030     }
1031
1032     /* Always declare the full set of constants, the compiler can remove the
1033      * unused ones because d3d doesn't (yet) support indirect int and bool
1034      * constant addressing. This avoids problems if the app uses e.g. i0 and i9. */
1035     if (shader->limits.constant_int > 0 && reg_maps->integer_constants)
1036         shader_addline(buffer, "uniform ivec4 %s_i[%u];\n", prefix, shader->limits.constant_int);
1037
1038     if (shader->limits.constant_bool > 0 && reg_maps->boolean_constants)
1039         shader_addline(buffer, "uniform bool %s_b[%u];\n", prefix, shader->limits.constant_bool);
1040
1041     for (i = 0; i < WINED3D_MAX_CBS; ++i)
1042     {
1043         if (reg_maps->cb_sizes[i])
1044             shader_addline(buffer, "uniform vec4 %s_cb%u[%u];\n", prefix, i, reg_maps->cb_sizes[i]);
1045     }
1046
1047     /* Declare texture samplers */
1048     for (i = 0; i < shader->limits.sampler; ++i)
1049     {
1050         if (reg_maps->sampler_type[i])
1051         {
1052             BOOL shadow_sampler = version->type == WINED3D_SHADER_TYPE_PIXEL && (ps_args->shadow & (1 << i));
1053             BOOL tex_rect;
1054
1055             switch (reg_maps->sampler_type[i])
1056             {
1057                 case WINED3DSTT_1D:
1058                     if (shadow_sampler)
1059                         shader_addline(buffer, "uniform sampler1DShadow %s_sampler%u;\n", prefix, i);
1060                     else
1061                         shader_addline(buffer, "uniform sampler1D %s_sampler%u;\n", prefix, i);
1062                     break;
1063                 case WINED3DSTT_2D:
1064                     tex_rect = version->type == WINED3D_SHADER_TYPE_PIXEL && (ps_args->np2_fixup & (1 << i));
1065                     tex_rect = tex_rect && gl_info->supported[ARB_TEXTURE_RECTANGLE];
1066                     if (shadow_sampler)
1067                     {
1068                         if (tex_rect)
1069                             shader_addline(buffer, "uniform sampler2DRectShadow %s_sampler%u;\n", prefix, i);
1070                         else
1071                             shader_addline(buffer, "uniform sampler2DShadow %s_sampler%u;\n", prefix, i);
1072                     }
1073                     else
1074                     {
1075                         if (tex_rect)
1076                             shader_addline(buffer, "uniform sampler2DRect %s_sampler%u;\n", prefix, i);
1077                         else
1078                             shader_addline(buffer, "uniform sampler2D %s_sampler%u;\n", prefix, i);
1079                     }
1080                     break;
1081                 case WINED3DSTT_CUBE:
1082                     if (shadow_sampler)
1083                         FIXME("Unsupported Cube shadow sampler.\n");
1084                     shader_addline(buffer, "uniform samplerCube %s_sampler%u;\n", prefix, i);
1085                     break;
1086                 case WINED3DSTT_VOLUME:
1087                     if (shadow_sampler)
1088                         FIXME("Unsupported 3D shadow sampler.\n");
1089                     shader_addline(buffer, "uniform sampler3D %s_sampler%u;\n", prefix, i);
1090                     break;
1091                 default:
1092                     shader_addline(buffer, "uniform unsupported_sampler %s_sampler%u;\n", prefix, i);
1093                     FIXME("Unrecognized sampler type: %#x\n", reg_maps->sampler_type[i]);
1094                     break;
1095             }
1096         }
1097     }
1098
1099     /* Declare uniforms for NP2 texcoord fixup:
1100      * This is NOT done inside the loop that declares the texture samplers
1101      * since the NP2 fixup code is currently only used for the GeforceFX
1102      * series and when forcing the ARB_npot extension off. Modern cards just
1103      * skip the code anyway, so put it inside a separate loop. */
1104     if (version->type == WINED3D_SHADER_TYPE_PIXEL && ps_args->np2_fixup)
1105     {
1106         struct ps_np2fixup_info *fixup = ctx_priv->cur_np2fixup_info;
1107         UINT cur = 0;
1108
1109         /* NP2/RECT textures in OpenGL use texcoords in the range [0,width]x[0,height]
1110          * while D3D has them in the (normalized) [0,1]x[0,1] range.
1111          * samplerNP2Fixup stores texture dimensions and is updated through
1112          * shader_glsl_load_np2fixup_constants when the sampler changes. */
1113
1114         for (i = 0; i < shader->limits.sampler; ++i)
1115         {
1116             if (reg_maps->sampler_type[i])
1117             {
1118                 if (!(ps_args->np2_fixup & (1 << i))) continue;
1119
1120                 if (WINED3DSTT_2D != reg_maps->sampler_type[i]) {
1121                     FIXME("Non-2D texture is flagged for NP2 texcoord fixup.\n");
1122                     continue;
1123                 }
1124
1125                 fixup->idx[i] = cur++;
1126             }
1127         }
1128
1129         fixup->num_consts = (cur + 1) >> 1;
1130         fixup->active = ps_args->np2_fixup;
1131         shader_addline(buffer, "uniform vec4 %s_samplerNP2Fixup[%u];\n", prefix, fixup->num_consts);
1132     }
1133
1134     /* Declare address variables */
1135     for (i = 0, map = reg_maps->address; map; map >>= 1, ++i)
1136     {
1137         if (map & 1) shader_addline(buffer, "ivec4 A%u;\n", i);
1138     }
1139
1140     /* Declare texture coordinate temporaries and initialize them */
1141     for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i)
1142     {
1143         if (map & 1) shader_addline(buffer, "vec4 T%u = gl_TexCoord[%u];\n", i, i);
1144     }
1145
1146     if (version->type == WINED3D_SHADER_TYPE_VERTEX)
1147     {
1148         /* Declare attributes. */
1149         for (i = 0, map = reg_maps->input_registers; map; map >>= 1, ++i)
1150         {
1151             if (map & 1)
1152                 shader_addline(buffer, "attribute vec4 %s_in%u;\n", prefix, i);
1153         }
1154
1155         shader_addline(buffer, "uniform vec4 posFixup;\n");
1156         shader_addline(buffer, "void order_ps_input(in vec4[%u]);\n", shader->limits.packed_output);
1157     }
1158     else if (version->type == WINED3D_SHADER_TYPE_GEOMETRY)
1159     {
1160         shader_addline(buffer, "varying in vec4 gs_in[][%u];\n", shader->limits.packed_input);
1161     }
1162     else if (version->type == WINED3D_SHADER_TYPE_PIXEL)
1163     {
1164         if (version->major >= 3)
1165         {
1166             UINT in_count = min(vec4_varyings(version->major, gl_info), shader->limits.packed_input);
1167
1168             if (use_vs(state))
1169                 shader_addline(buffer, "varying vec4 %s_in[%u];\n", prefix, in_count);
1170             else
1171                 /* TODO: Write a replacement shader for the fixed function
1172                  * vertex pipeline, so this isn't needed. For fixed function
1173                  * vertex processing + 3.0 pixel shader we need a separate
1174                  * function in the pixel shader that reads the fixed function
1175                  * color into the packed input registers. */
1176                 shader_addline(buffer, "vec4 %s_in[%u];\n", prefix, in_count);
1177         }
1178
1179         for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i)
1180         {
1181             if (!(map & 1))
1182                 continue;
1183
1184             shader_addline(buffer, "uniform mat2 bumpenv_mat%u;\n", i);
1185
1186             if (reg_maps->luminanceparams & (1 << i))
1187             {
1188                 shader_addline(buffer, "uniform float bumpenv_lum_scale%u;\n", i);
1189                 shader_addline(buffer, "uniform float bumpenv_lum_offset%u;\n", i);
1190                 extra_constants_needed++;
1191             }
1192
1193             extra_constants_needed++;
1194         }
1195
1196         if (ps_args->srgb_correction)
1197         {
1198             shader_addline(buffer, "const vec4 srgb_const0 = vec4(%.8e, %.8e, %.8e, %.8e);\n",
1199                     srgb_pow, srgb_mul_high, srgb_sub_high, srgb_mul_low);
1200             shader_addline(buffer, "const vec4 srgb_const1 = vec4(%.8e, 0.0, 0.0, 0.0);\n",
1201                     srgb_cmp);
1202         }
1203         if (reg_maps->vpos || reg_maps->usesdsy)
1204         {
1205             if (shader->limits.constant_float + extra_constants_needed
1206                     + 1 < gl_info->limits.glsl_ps_float_constants)
1207             {
1208                 shader_addline(buffer, "uniform vec4 ycorrection;\n");
1209                 extra_constants_needed++;
1210             }
1211             else
1212             {
1213                 /* This happens because we do not have proper tracking of the constant registers that are
1214                  * actually used, only the max limit of the shader version
1215                  */
1216                 FIXME("Cannot find a free uniform for vpos correction params\n");
1217                 shader_addline(buffer, "const vec4 ycorrection = vec4(%f, %f, 0.0, 0.0);\n",
1218                         context->render_offscreen ? 0.0f : fb->render_targets[0]->resource.height,
1219                         context->render_offscreen ? 1.0f : -1.0f);
1220             }
1221             shader_addline(buffer, "vec4 vpos;\n");
1222         }
1223     }
1224
1225     /* Declare output register temporaries */
1226     if (shader->limits.packed_output)
1227         shader_addline(buffer, "vec4 %s_out[%u];\n", prefix, shader->limits.packed_output);
1228
1229     /* Declare temporary variables */
1230     for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
1231     {
1232         if (map & 1) shader_addline(buffer, "vec4 R%u;\n", i);
1233     }
1234
1235     /* Declare loop registers aLx */
1236     if (version->major < 4)
1237     {
1238         for (i = 0; i < reg_maps->loop_depth; ++i)
1239         {
1240             shader_addline(buffer, "int aL%u;\n", i);
1241             shader_addline(buffer, "int tmpInt%u;\n", i);
1242         }
1243     }
1244
1245     /* Temporary variables for matrix operations */
1246     shader_addline(buffer, "vec4 tmp0;\n");
1247     shader_addline(buffer, "vec4 tmp1;\n");
1248
1249     if (!shader->load_local_constsF)
1250     {
1251         LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
1252         {
1253             const float *value;
1254             value = (const float *)lconst->value;
1255             shader_addline(buffer, "const vec4 %s_lc%u = vec4(%.8e, %.8e, %.8e, %.8e);\n",
1256                     prefix, lconst->idx, value[0], value[1], value[2], value[3]);
1257         }
1258     }
1259
1260     /* Start the main program. */
1261     shader_addline(buffer, "void main()\n{\n");
1262
1263     /* Direct3D applications expect integer vPos values, while OpenGL drivers
1264      * add approximately 0.5. This causes off-by-one problems as spotted by
1265      * the vPos d3d9 visual test. Unfortunately ATI cards do not add exactly
1266      * 0.5, but rather something like 0.49999999 or 0.50000001, which still
1267      * causes precision troubles when we just subtract 0.5.
1268      *
1269      * To deal with that, just floor() the position. This will eliminate the
1270      * fraction on all cards.
1271      *
1272      * TODO: Test how this behaves with multisampling.
1273      *
1274      * An advantage of floor is that it works even if the driver doesn't add
1275      * 0.5. It is somewhat questionable if 1.5, 2.5, ... are the proper values
1276      * to return in gl_FragCoord, even though coordinates specify the pixel
1277      * centers instead of the pixel corners. This code will behave correctly
1278      * on drivers that returns integer values. */
1279     if (version->type == WINED3D_SHADER_TYPE_PIXEL && reg_maps->vpos)
1280         shader_addline(buffer,
1281                 "vpos = floor(vec4(0, ycorrection[0], 0, 0) + gl_FragCoord * vec4(1, ycorrection[1], 1, 1));\n");
1282 }
1283
1284 /*****************************************************************************
1285  * Functions to generate GLSL strings from DirectX Shader bytecode begin here.
1286  *
1287  * For more information, see http://wiki.winehq.org/DirectX-Shaders
1288  ****************************************************************************/
1289
1290 /* Prototypes */
1291 static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins,
1292         const struct wined3d_shader_src_param *wined3d_src, DWORD mask, struct glsl_src_param *glsl_src);
1293
1294 /** Used for opcode modifiers - They multiply the result by the specified amount */
1295 static const char * const shift_glsl_tab[] = {
1296     "",           /*  0 (none) */
1297     "2.0 * ",     /*  1 (x2)   */
1298     "4.0 * ",     /*  2 (x4)   */
1299     "8.0 * ",     /*  3 (x8)   */
1300     "16.0 * ",    /*  4 (x16)  */
1301     "32.0 * ",    /*  5 (x32)  */
1302     "",           /*  6 (x64)  */
1303     "",           /*  7 (x128) */
1304     "",           /*  8 (d256) */
1305     "",           /*  9 (d128) */
1306     "",           /* 10 (d64)  */
1307     "",           /* 11 (d32)  */
1308     "0.0625 * ",  /* 12 (d16)  */
1309     "0.125 * ",   /* 13 (d8)   */
1310     "0.25 * ",    /* 14 (d4)   */
1311     "0.5 * "      /* 15 (d2)   */
1312 };
1313
1314 /* Generate a GLSL parameter that does the input modifier computation and return the input register/mask to use */
1315 static void shader_glsl_gen_modifier(enum wined3d_shader_src_modifier src_modifier,
1316         const char *in_reg, const char *in_regswizzle, char *out_str)
1317 {
1318     out_str[0] = 0;
1319
1320     switch (src_modifier)
1321     {
1322     case WINED3DSPSM_DZ: /* Need to handle this in the instructions itself (texld & texcrd). */
1323     case WINED3DSPSM_DW:
1324     case WINED3DSPSM_NONE:
1325         sprintf(out_str, "%s%s", in_reg, in_regswizzle);
1326         break;
1327     case WINED3DSPSM_NEG:
1328         sprintf(out_str, "-%s%s", in_reg, in_regswizzle);
1329         break;
1330     case WINED3DSPSM_NOT:
1331         sprintf(out_str, "!%s%s", in_reg, in_regswizzle);
1332         break;
1333     case WINED3DSPSM_BIAS:
1334         sprintf(out_str, "(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
1335         break;
1336     case WINED3DSPSM_BIASNEG:
1337         sprintf(out_str, "-(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
1338         break;
1339     case WINED3DSPSM_SIGN:
1340         sprintf(out_str, "(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
1341         break;
1342     case WINED3DSPSM_SIGNNEG:
1343         sprintf(out_str, "-(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
1344         break;
1345     case WINED3DSPSM_COMP:
1346         sprintf(out_str, "(1.0 - %s%s)", in_reg, in_regswizzle);
1347         break;
1348     case WINED3DSPSM_X2:
1349         sprintf(out_str, "(2.0 * %s%s)", in_reg, in_regswizzle);
1350         break;
1351     case WINED3DSPSM_X2NEG:
1352         sprintf(out_str, "-(2.0 * %s%s)", in_reg, in_regswizzle);
1353         break;
1354     case WINED3DSPSM_ABS:
1355         sprintf(out_str, "abs(%s%s)", in_reg, in_regswizzle);
1356         break;
1357     case WINED3DSPSM_ABSNEG:
1358         sprintf(out_str, "-abs(%s%s)", in_reg, in_regswizzle);
1359         break;
1360     default:
1361         FIXME("Unhandled modifier %u\n", src_modifier);
1362         sprintf(out_str, "%s%s", in_reg, in_regswizzle);
1363     }
1364 }
1365
1366 /** Writes the GLSL variable name that corresponds to the register that the
1367  * DX opcode parameter is trying to access */
1368 static void shader_glsl_get_register_name(const struct wined3d_shader_register *reg,
1369         char *register_name, BOOL *is_color, const struct wined3d_shader_instruction *ins)
1370 {
1371     /* oPos, oFog and oPts in D3D */
1372     static const char * const hwrastout_reg_names[] = {"vs_out[10]", "vs_out[11].x", "vs_out[11].y"};
1373
1374     const struct wined3d_shader *shader = ins->ctx->shader;
1375     const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
1376     const struct wined3d_shader_version *version = &reg_maps->shader_version;
1377     const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
1378     const char *prefix = shader_glsl_get_prefix(version->type);
1379     struct glsl_src_param rel_param0, rel_param1;
1380
1381     if (reg->idx[0].offset != ~0U && reg->idx[0].rel_addr)
1382         shader_glsl_add_src_param(ins, reg->idx[0].rel_addr, WINED3DSP_WRITEMASK_0, &rel_param0);
1383     if (reg->idx[1].offset != ~0U && reg->idx[1].rel_addr)
1384         shader_glsl_add_src_param(ins, reg->idx[1].rel_addr, WINED3DSP_WRITEMASK_0, &rel_param1);
1385     *is_color = FALSE;
1386
1387     switch (reg->type)
1388     {
1389         case WINED3DSPR_TEMP:
1390             sprintf(register_name, "R%u", reg->idx[0].offset);
1391             break;
1392
1393         case WINED3DSPR_INPUT:
1394             /* vertex shaders */
1395             if (version->type == WINED3D_SHADER_TYPE_VERTEX)
1396             {
1397                 struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
1398                 if (priv->cur_vs_args->swizzle_map & (1 << reg->idx[0].offset))
1399                     *is_color = TRUE;
1400                 sprintf(register_name, "%s_in%u", prefix, reg->idx[0].offset);
1401                 break;
1402             }
1403
1404             if (version->type == WINED3D_SHADER_TYPE_GEOMETRY)
1405             {
1406                 if (reg->idx[0].rel_addr)
1407                 {
1408                     if (reg->idx[1].rel_addr)
1409                         sprintf(register_name, "gs_in[%s + %u][%s + %u]",
1410                                 rel_param0.param_str, reg->idx[0].offset, rel_param1.param_str, reg->idx[1].offset);
1411                     else
1412                         sprintf(register_name, "gs_in[%s + %u][%u]",
1413                                 rel_param0.param_str, reg->idx[0].offset, reg->idx[1].offset);
1414                 }
1415                 else if (reg->idx[1].rel_addr)
1416                     sprintf(register_name, "gs_in[%u][%s + %u]",
1417                             reg->idx[0].offset, rel_param1.param_str, reg->idx[1].offset);
1418                 else
1419                     sprintf(register_name, "gs_in[%u][%u]", reg->idx[0].offset, reg->idx[1].offset);
1420                 break;
1421             }
1422
1423             /* pixel shaders >= 3.0 */
1424             if (version->major >= 3)
1425             {
1426                 DWORD idx = shader->u.ps.input_reg_map[reg->idx[0].offset];
1427                 unsigned int in_count = vec4_varyings(version->major, gl_info);
1428
1429                 if (reg->idx[0].rel_addr)
1430                 {
1431                     /* Removing a + 0 would be an obvious optimization, but
1432                      * OS X doesn't see the NOP operation there. */
1433                     if (idx)
1434                     {
1435                         if (shader->u.ps.declared_in_count > in_count)
1436                         {
1437                             sprintf(register_name,
1438                                     "((%s + %u) > %u ? (%s + %u) > %u ? gl_SecondaryColor : gl_Color : %s_in[%s + %u])",
1439                                     rel_param0.param_str, idx, in_count - 1, rel_param0.param_str, idx, in_count,
1440                                     prefix, rel_param0.param_str, idx);
1441                         }
1442                         else
1443                         {
1444                             sprintf(register_name, "%s_in[%s + %u]", prefix, rel_param0.param_str, idx);
1445                         }
1446                     }
1447                     else
1448                     {
1449                         if (shader->u.ps.declared_in_count > in_count)
1450                         {
1451                             sprintf(register_name, "((%s) > %u ? (%s) > %u ? gl_SecondaryColor : gl_Color : %s_in[%s])",
1452                                     rel_param0.param_str, in_count - 1, rel_param0.param_str, in_count,
1453                                     prefix, rel_param0.param_str);
1454                         }
1455                         else
1456                         {
1457                             sprintf(register_name, "%s_in[%s]", prefix, rel_param0.param_str);
1458                         }
1459                     }
1460                 }
1461                 else
1462                 {
1463                     if (idx == in_count) sprintf(register_name, "gl_Color");
1464                     else if (idx == in_count + 1) sprintf(register_name, "gl_SecondaryColor");
1465                     else sprintf(register_name, "%s_in[%u]", prefix, idx);
1466                 }
1467             }
1468             else
1469             {
1470                 if (!reg->idx[0].offset)
1471                     strcpy(register_name, "gl_Color");
1472                 else
1473                     strcpy(register_name, "gl_SecondaryColor");
1474                 break;
1475             }
1476             break;
1477
1478         case WINED3DSPR_CONST:
1479             {
1480                 /* Relative addressing */
1481                 if (reg->idx[0].rel_addr)
1482                 {
1483                     if (reg->idx[0].offset)
1484                         sprintf(register_name, "%s_c[%s + %u]", prefix, rel_param0.param_str, reg->idx[0].offset);
1485                     else
1486                         sprintf(register_name, "%s_c[%s]", prefix, rel_param0.param_str);
1487                 }
1488                 else
1489                 {
1490                     if (shader_constant_is_local(shader, reg->idx[0].offset))
1491                         sprintf(register_name, "%s_lc%u", prefix, reg->idx[0].offset);
1492                     else
1493                         sprintf(register_name, "%s_c[%u]", prefix, reg->idx[0].offset);
1494                 }
1495             }
1496             break;
1497
1498         case WINED3DSPR_CONSTINT:
1499             sprintf(register_name, "%s_i[%u]", prefix, reg->idx[0].offset);
1500             break;
1501
1502         case WINED3DSPR_CONSTBOOL:
1503             sprintf(register_name, "%s_b[%u]", prefix, reg->idx[0].offset);
1504             break;
1505
1506         case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
1507             if (version->type == WINED3D_SHADER_TYPE_PIXEL)
1508                 sprintf(register_name, "T%u", reg->idx[0].offset);
1509             else
1510                 sprintf(register_name, "A%u", reg->idx[0].offset);
1511             break;
1512
1513         case WINED3DSPR_LOOP:
1514             sprintf(register_name, "aL%u", ins->ctx->loop_state->current_reg - 1);
1515             break;
1516
1517         case WINED3DSPR_SAMPLER:
1518             sprintf(register_name, "%s_sampler%u", prefix, reg->idx[0].offset);
1519             break;
1520
1521         case WINED3DSPR_COLOROUT:
1522             if (reg->idx[0].offset >= gl_info->limits.buffers)
1523                 WARN("Write to render target %u, only %d supported.\n",
1524                         reg->idx[0].offset, gl_info->limits.buffers);
1525
1526             sprintf(register_name, "gl_FragData[%u]", reg->idx[0].offset);
1527             break;
1528
1529         case WINED3DSPR_RASTOUT:
1530             sprintf(register_name, "%s", hwrastout_reg_names[reg->idx[0].offset]);
1531             break;
1532
1533         case WINED3DSPR_DEPTHOUT:
1534             sprintf(register_name, "gl_FragDepth");
1535             break;
1536
1537         case WINED3DSPR_ATTROUT:
1538             if (!reg->idx[0].offset)
1539                 sprintf(register_name, "%s_out[8]", prefix);
1540             else
1541                 sprintf(register_name, "%s_out[9]", prefix);
1542             break;
1543
1544         case WINED3DSPR_TEXCRDOUT:
1545             /* Vertex shaders >= 3.0: WINED3DSPR_OUTPUT */
1546             sprintf(register_name, "%s_out[%u]", prefix, reg->idx[0].offset);
1547             break;
1548
1549         case WINED3DSPR_MISCTYPE:
1550             if (!reg->idx[0].offset)
1551             {
1552                 /* vPos */
1553                 sprintf(register_name, "vpos");
1554             }
1555             else if (reg->idx[0].offset == 1)
1556             {
1557                 /* Note that gl_FrontFacing is a bool, while vFace is
1558                  * a float for which the sign determines front/back */
1559                 sprintf(register_name, "(gl_FrontFacing ? 1.0 : -1.0)");
1560             }
1561             else
1562             {
1563                 FIXME("Unhandled misctype register %u.\n", reg->idx[0].offset);
1564                 sprintf(register_name, "unrecognized_register");
1565             }
1566             break;
1567
1568         case WINED3DSPR_IMMCONST:
1569             switch (reg->immconst_type)
1570             {
1571                 case WINED3D_IMMCONST_SCALAR:
1572                     switch (reg->data_type)
1573                     {
1574                         case WINED3D_DATA_FLOAT:
1575                             sprintf(register_name, "%.8e", *(const float *)reg->immconst_data);
1576                             break;
1577                         case WINED3D_DATA_INT:
1578                             sprintf(register_name, "%#x", reg->immconst_data[0]);
1579                             break;
1580                         case WINED3D_DATA_RESOURCE:
1581                         case WINED3D_DATA_SAMPLER:
1582                         case WINED3D_DATA_UINT:
1583                             sprintf(register_name, "%#xu", reg->immconst_data[0]);
1584                             break;
1585                         default:
1586                             sprintf(register_name, "<unhandled data type %#x>", reg->data_type);
1587                             break;
1588                     }
1589                     break;
1590
1591                 case WINED3D_IMMCONST_VEC4:
1592                     switch (reg->data_type)
1593                     {
1594                         case WINED3D_DATA_FLOAT:
1595                             sprintf(register_name, "vec4(%.8e, %.8e, %.8e, %.8e)",
1596                                     *(const float *)&reg->immconst_data[0], *(const float *)&reg->immconst_data[1],
1597                                     *(const float *)&reg->immconst_data[2], *(const float *)&reg->immconst_data[3]);
1598                             break;
1599                         case WINED3D_DATA_INT:
1600                             sprintf(register_name, "ivec4(%#x, %#x, %#x, %#x)",
1601                                     reg->immconst_data[0], reg->immconst_data[1],
1602                                     reg->immconst_data[2], reg->immconst_data[3]);
1603                             break;
1604                         case WINED3D_DATA_RESOURCE:
1605                         case WINED3D_DATA_SAMPLER:
1606                         case WINED3D_DATA_UINT:
1607                             sprintf(register_name, "uvec4(%#xu, %#xu, %#xu, %#xu)",
1608                                     reg->immconst_data[0], reg->immconst_data[1],
1609                                     reg->immconst_data[2], reg->immconst_data[3]);
1610                             break;
1611                         default:
1612                             sprintf(register_name, "<unhandled data type %#x>", reg->data_type);
1613                             break;
1614                     }
1615                     break;
1616
1617                 default:
1618                     FIXME("Unhandled immconst type %#x\n", reg->immconst_type);
1619                     sprintf(register_name, "<unhandled_immconst_type %#x>", reg->immconst_type);
1620             }
1621             break;
1622
1623         case WINED3DSPR_CONSTBUFFER:
1624             if (reg->idx[1].rel_addr)
1625                 sprintf(register_name, "%s_cb%u[%s + %u]",
1626                         prefix, reg->idx[0].offset, rel_param1.param_str, reg->idx[1].offset);
1627             else
1628                 sprintf(register_name, "%s_cb%u[%u]", prefix, reg->idx[0].offset, reg->idx[1].offset);
1629             break;
1630
1631         case WINED3DSPR_PRIMID:
1632             sprintf(register_name, "uint(gl_PrimitiveIDIn)");
1633             break;
1634
1635         default:
1636             FIXME("Unhandled register type %#x.\n", reg->type);
1637             sprintf(register_name, "unrecognized_register");
1638             break;
1639     }
1640 }
1641
1642 static void shader_glsl_write_mask_to_str(DWORD write_mask, char *str)
1643 {
1644     *str++ = '.';
1645     if (write_mask & WINED3DSP_WRITEMASK_0) *str++ = 'x';
1646     if (write_mask & WINED3DSP_WRITEMASK_1) *str++ = 'y';
1647     if (write_mask & WINED3DSP_WRITEMASK_2) *str++ = 'z';
1648     if (write_mask & WINED3DSP_WRITEMASK_3) *str++ = 'w';
1649     *str = '\0';
1650 }
1651
1652 /* Get the GLSL write mask for the destination register */
1653 static DWORD shader_glsl_get_write_mask(const struct wined3d_shader_dst_param *param, char *write_mask)
1654 {
1655     DWORD mask = param->write_mask;
1656
1657     if (shader_is_scalar(&param->reg))
1658     {
1659         mask = WINED3DSP_WRITEMASK_0;
1660         *write_mask = '\0';
1661     }
1662     else
1663     {
1664         shader_glsl_write_mask_to_str(mask, write_mask);
1665     }
1666
1667     return mask;
1668 }
1669
1670 static unsigned int shader_glsl_get_write_mask_size(DWORD write_mask) {
1671     unsigned int size = 0;
1672
1673     if (write_mask & WINED3DSP_WRITEMASK_0) ++size;
1674     if (write_mask & WINED3DSP_WRITEMASK_1) ++size;
1675     if (write_mask & WINED3DSP_WRITEMASK_2) ++size;
1676     if (write_mask & WINED3DSP_WRITEMASK_3) ++size;
1677
1678     return size;
1679 }
1680
1681 static void shader_glsl_swizzle_to_str(const DWORD swizzle, BOOL fixup, DWORD mask, char *str)
1682 {
1683     /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
1684      * but addressed as "rgba". To fix this we need to swap the register's x
1685      * and z components. */
1686     const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
1687
1688     *str++ = '.';
1689     /* swizzle bits fields: wwzzyyxx */
1690     if (mask & WINED3DSP_WRITEMASK_0) *str++ = swizzle_chars[swizzle & 0x03];
1691     if (mask & WINED3DSP_WRITEMASK_1) *str++ = swizzle_chars[(swizzle >> 2) & 0x03];
1692     if (mask & WINED3DSP_WRITEMASK_2) *str++ = swizzle_chars[(swizzle >> 4) & 0x03];
1693     if (mask & WINED3DSP_WRITEMASK_3) *str++ = swizzle_chars[(swizzle >> 6) & 0x03];
1694     *str = '\0';
1695 }
1696
1697 static void shader_glsl_get_swizzle(const struct wined3d_shader_src_param *param,
1698         BOOL fixup, DWORD mask, char *swizzle_str)
1699 {
1700     if (shader_is_scalar(&param->reg))
1701         *swizzle_str = '\0';
1702     else
1703         shader_glsl_swizzle_to_str(param->swizzle, fixup, mask, swizzle_str);
1704 }
1705
1706 /* From a given parameter token, generate the corresponding GLSL string.
1707  * Also, return the actual register name and swizzle in case the
1708  * caller needs this information as well. */
1709 static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins,
1710         const struct wined3d_shader_src_param *wined3d_src, DWORD mask, struct glsl_src_param *glsl_src)
1711 {
1712     BOOL is_color = FALSE;
1713     char swizzle_str[6];
1714
1715     glsl_src->reg_name[0] = '\0';
1716     glsl_src->param_str[0] = '\0';
1717     swizzle_str[0] = '\0';
1718
1719     shader_glsl_get_register_name(&wined3d_src->reg, glsl_src->reg_name, &is_color, ins);
1720     shader_glsl_get_swizzle(wined3d_src, is_color, mask, swizzle_str);
1721
1722     if (wined3d_src->reg.type == WINED3DSPR_IMMCONST || wined3d_src->reg.type == WINED3DSPR_PRIMID)
1723     {
1724         shader_glsl_gen_modifier(wined3d_src->modifiers, glsl_src->reg_name, swizzle_str, glsl_src->param_str);
1725     }
1726     else
1727     {
1728         char param_str[200];
1729
1730         shader_glsl_gen_modifier(wined3d_src->modifiers, glsl_src->reg_name, swizzle_str, param_str);
1731
1732         switch (wined3d_src->reg.data_type)
1733         {
1734             case WINED3D_DATA_FLOAT:
1735                 sprintf(glsl_src->param_str, "%s", param_str);
1736                 break;
1737             case WINED3D_DATA_INT:
1738                 sprintf(glsl_src->param_str, "floatBitsToInt(%s)", param_str);
1739                 break;
1740             case WINED3D_DATA_RESOURCE:
1741             case WINED3D_DATA_SAMPLER:
1742             case WINED3D_DATA_UINT:
1743                 sprintf(glsl_src->param_str, "floatBitsToUint(%s)", param_str);
1744                 break;
1745             default:
1746                 FIXME("Unhandled data type %#x.\n", wined3d_src->reg.data_type);
1747                 sprintf(glsl_src->param_str, "%s", param_str);
1748                 break;
1749         }
1750     }
1751 }
1752
1753 /* From a given parameter token, generate the corresponding GLSL string.
1754  * Also, return the actual register name and swizzle in case the
1755  * caller needs this information as well. */
1756 static DWORD shader_glsl_add_dst_param(const struct wined3d_shader_instruction *ins,
1757         const struct wined3d_shader_dst_param *wined3d_dst, struct glsl_dst_param *glsl_dst)
1758 {
1759     BOOL is_color = FALSE;
1760
1761     glsl_dst->mask_str[0] = '\0';
1762     glsl_dst->reg_name[0] = '\0';
1763
1764     shader_glsl_get_register_name(&wined3d_dst->reg, glsl_dst->reg_name, &is_color, ins);
1765     return shader_glsl_get_write_mask(wined3d_dst, glsl_dst->mask_str);
1766 }
1767
1768 /* Append the destination part of the instruction to the buffer, return the effective write mask */
1769 static DWORD shader_glsl_append_dst_ext(struct wined3d_shader_buffer *buffer,
1770         const struct wined3d_shader_instruction *ins, const struct wined3d_shader_dst_param *dst)
1771 {
1772     struct glsl_dst_param glsl_dst;
1773     DWORD mask;
1774
1775     if ((mask = shader_glsl_add_dst_param(ins, dst, &glsl_dst)))
1776     {
1777         switch (dst->reg.data_type)
1778         {
1779             case WINED3D_DATA_FLOAT:
1780                 shader_addline(buffer, "%s%s = %s(",
1781                         glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]);
1782                 break;
1783             case WINED3D_DATA_INT:
1784                 shader_addline(buffer, "%s%s = %sintBitsToFloat(",
1785                         glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]);
1786                 break;
1787             case WINED3D_DATA_RESOURCE:
1788             case WINED3D_DATA_SAMPLER:
1789             case WINED3D_DATA_UINT:
1790                 shader_addline(buffer, "%s%s = %suintBitsToFloat(",
1791                         glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]);
1792                 break;
1793             default:
1794                 FIXME("Unhandled data type %#x.\n", dst->reg.data_type);
1795                 shader_addline(buffer, "%s%s = %s(",
1796                         glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]);
1797                 break;
1798         }
1799     }
1800
1801     return mask;
1802 }
1803
1804 /* Append the destination part of the instruction to the buffer, return the effective write mask */
1805 static DWORD shader_glsl_append_dst(struct wined3d_shader_buffer *buffer, const struct wined3d_shader_instruction *ins)
1806 {
1807     return shader_glsl_append_dst_ext(buffer, ins, &ins->dst[0]);
1808 }
1809
1810 /** Process GLSL instruction modifiers */
1811 static void shader_glsl_add_instruction_modifiers(const struct wined3d_shader_instruction *ins)
1812 {
1813     struct glsl_dst_param dst_param;
1814     DWORD modifiers;
1815
1816     if (!ins->dst_count) return;
1817
1818     modifiers = ins->dst[0].modifiers;
1819     if (!modifiers) return;
1820
1821     shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
1822
1823     if (modifiers & WINED3DSPDM_SATURATE)
1824     {
1825         /* _SAT means to clamp the value of the register to between 0 and 1 */
1826         shader_addline(ins->ctx->buffer, "%s%s = clamp(%s%s, 0.0, 1.0);\n", dst_param.reg_name,
1827                 dst_param.mask_str, dst_param.reg_name, dst_param.mask_str);
1828     }
1829
1830     if (modifiers & WINED3DSPDM_MSAMPCENTROID)
1831     {
1832         FIXME("_centroid modifier not handled\n");
1833     }
1834
1835     if (modifiers & WINED3DSPDM_PARTIALPRECISION)
1836     {
1837         /* MSDN says this modifier can be safely ignored, so that's what we'll do. */
1838     }
1839 }
1840
1841 static const char *shader_glsl_get_rel_op(enum wined3d_shader_rel_op op)
1842 {
1843     switch (op)
1844     {
1845         case WINED3D_SHADER_REL_OP_GT: return ">";
1846         case WINED3D_SHADER_REL_OP_EQ: return "==";
1847         case WINED3D_SHADER_REL_OP_GE: return ">=";
1848         case WINED3D_SHADER_REL_OP_LT: return "<";
1849         case WINED3D_SHADER_REL_OP_NE: return "!=";
1850         case WINED3D_SHADER_REL_OP_LE: return "<=";
1851         default:
1852             FIXME("Unrecognized operator %#x.\n", op);
1853             return "(\?\?)";
1854     }
1855 }
1856
1857 static void shader_glsl_get_sample_function(const struct wined3d_shader_context *ctx,
1858         DWORD sampler_idx, DWORD flags, struct glsl_sample_function *sample_function)
1859 {
1860     enum wined3d_sampler_texture_type sampler_type = ctx->reg_maps->sampler_type[sampler_idx];
1861     const struct wined3d_gl_info *gl_info = ctx->gl_info;
1862     BOOL shadow = ctx->reg_maps->shader_version.type == WINED3D_SHADER_TYPE_PIXEL
1863             && (((const struct shader_glsl_ctx_priv *)ctx->backend_data)->cur_ps_args->shadow & (1 << sampler_idx));
1864     BOOL projected = flags & WINED3D_GLSL_SAMPLE_PROJECTED;
1865     BOOL texrect = flags & WINED3D_GLSL_SAMPLE_NPOT && gl_info->supported[ARB_TEXTURE_RECTANGLE];
1866     BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD;
1867     BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD;
1868
1869     /* Note that there's no such thing as a projected cube texture. */
1870     switch(sampler_type) {
1871         case WINED3DSTT_1D:
1872             if (shadow)
1873             {
1874                 if (lod)
1875                 {
1876                     sample_function->name = projected ? "shadow1DProjLod" : "shadow1DLod";
1877                 }
1878                 else if (grad)
1879                 {
1880                     if (gl_info->supported[EXT_GPU_SHADER4])
1881                         sample_function->name = projected ? "shadow1DProjGrad" : "shadow1DGrad";
1882                     else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1883                         sample_function->name = projected ? "shadow1DProjGradARB" : "shadow1DGradARB";
1884                     else
1885                     {
1886                         FIXME("Unsupported 1D shadow grad function.\n");
1887                         sample_function->name = "unsupported1DGrad";
1888                     }
1889                 }
1890                 else
1891                 {
1892                     sample_function->name = projected ? "shadow1DProj" : "shadow1D";
1893                 }
1894                 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1;
1895             }
1896             else
1897             {
1898                 if (lod)
1899                 {
1900                     sample_function->name = projected ? "texture1DProjLod" : "texture1DLod";
1901                 }
1902                 else if (grad)
1903                 {
1904                     if (gl_info->supported[EXT_GPU_SHADER4])
1905                         sample_function->name = projected ? "texture1DProjGrad" : "texture1DGrad";
1906                     else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1907                         sample_function->name = projected ? "texture1DProjGradARB" : "texture1DGradARB";
1908                     else
1909                     {
1910                         FIXME("Unsupported 1D grad function.\n");
1911                         sample_function->name = "unsupported1DGrad";
1912                     }
1913                 }
1914                 else
1915                 {
1916                     sample_function->name = projected ? "texture1DProj" : "texture1D";
1917                 }
1918                 sample_function->coord_mask = WINED3DSP_WRITEMASK_0;
1919             }
1920             break;
1921
1922         case WINED3DSTT_2D:
1923             if (shadow)
1924             {
1925                 if (texrect)
1926                 {
1927                     if (lod)
1928                     {
1929                         sample_function->name = projected ? "shadow2DRectProjLod" : "shadow2DRectLod";
1930                     }
1931                     else if (grad)
1932                     {
1933                         if (gl_info->supported[EXT_GPU_SHADER4])
1934                             sample_function->name = projected ? "shadow2DRectProjGrad" : "shadow2DRectGrad";
1935                         else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1936                             sample_function->name = projected ? "shadow2DRectProjGradARB" : "shadow2DRectGradARB";
1937                         else
1938                         {
1939                             FIXME("Unsupported RECT shadow grad function.\n");
1940                             sample_function->name = "unsupported2DRectGrad";
1941                         }
1942                     }
1943                     else
1944                     {
1945                         sample_function->name = projected ? "shadow2DRectProj" : "shadow2DRect";
1946                     }
1947                 }
1948                 else
1949                 {
1950                     if (lod)
1951                     {
1952                         sample_function->name = projected ? "shadow2DProjLod" : "shadow2DLod";
1953                     }
1954                     else if (grad)
1955                     {
1956                         if (gl_info->supported[EXT_GPU_SHADER4])
1957                             sample_function->name = projected ? "shadow2DProjGrad" : "shadow2DGrad";
1958                         else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1959                             sample_function->name = projected ? "shadow2DProjGradARB" : "shadow2DGradARB";
1960                         else
1961                         {
1962                             FIXME("Unsupported 2D shadow grad function.\n");
1963                             sample_function->name = "unsupported2DGrad";
1964                         }
1965                     }
1966                     else
1967                     {
1968                         sample_function->name = projected ? "shadow2DProj" : "shadow2D";
1969                     }
1970                 }
1971                 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1972             }
1973             else
1974             {
1975                 if (texrect)
1976                 {
1977                     if (lod)
1978                     {
1979                         sample_function->name = projected ? "texture2DRectProjLod" : "texture2DRectLod";
1980                     }
1981                     else if (grad)
1982                     {
1983                         if (gl_info->supported[EXT_GPU_SHADER4])
1984                             sample_function->name = projected ? "texture2DRectProjGrad" : "texture2DRectGrad";
1985                         else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1986                             sample_function->name = projected ? "texture2DRectProjGradARB" : "texture2DRectGradARB";
1987                         else
1988                         {
1989                             FIXME("Unsupported RECT grad function.\n");
1990                             sample_function->name = "unsupported2DRectGrad";
1991                         }
1992                     }
1993                     else
1994                     {
1995                         sample_function->name = projected ? "texture2DRectProj" : "texture2DRect";
1996                     }
1997                 }
1998                 else
1999                 {
2000                     if (lod)
2001                     {
2002                         sample_function->name = projected ? "texture2DProjLod" : "texture2DLod";
2003                     }
2004                     else if (grad)
2005                     {
2006                         if (gl_info->supported[EXT_GPU_SHADER4])
2007                             sample_function->name = projected ? "texture2DProjGrad" : "texture2DGrad";
2008                         else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
2009                             sample_function->name = projected ? "texture2DProjGradARB" : "texture2DGradARB";
2010                         else
2011                         {
2012                             FIXME("Unsupported 2D grad function.\n");
2013                             sample_function->name = "unsupported2DGrad";
2014                         }
2015                     }
2016                     else
2017                     {
2018                         sample_function->name = projected ? "texture2DProj" : "texture2D";
2019                     }
2020                 }
2021                 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1;
2022             }
2023             break;
2024
2025         case WINED3DSTT_CUBE:
2026             if (shadow)
2027             {
2028                 FIXME("Unsupported Cube shadow function.\n");
2029                 sample_function->name = "unsupportedCubeShadow";
2030                 sample_function->coord_mask = 0;
2031             }
2032             else
2033             {
2034                 if (lod)
2035                 {
2036                     sample_function->name = "textureCubeLod";
2037                 }
2038                 else if (grad)
2039                 {
2040                     if (gl_info->supported[EXT_GPU_SHADER4])
2041                         sample_function->name = "textureCubeGrad";
2042                     else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
2043                         sample_function->name = "textureCubeGradARB";
2044                     else
2045                     {
2046                         FIXME("Unsupported Cube grad function.\n");
2047                         sample_function->name = "unsupportedCubeGrad";
2048                     }
2049                 }
2050                 else
2051                 {
2052                     sample_function->name = "textureCube";
2053                 }
2054                 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2055             }
2056             break;
2057
2058         case WINED3DSTT_VOLUME:
2059             if (shadow)
2060             {
2061                 FIXME("Unsupported 3D shadow function.\n");
2062                 sample_function->name = "unsupported3DShadow";
2063                 sample_function->coord_mask = 0;
2064             }
2065             else
2066             {
2067                 if (lod)
2068                 {
2069                     sample_function->name = projected ? "texture3DProjLod" : "texture3DLod";
2070                 }
2071                 else  if (grad)
2072                 {
2073                     if (gl_info->supported[EXT_GPU_SHADER4])
2074                         sample_function->name = projected ? "texture3DProjGrad" : "texture3DGrad";
2075                     else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
2076                         sample_function->name = projected ? "texture3DProjGradARB" : "texture3DGradARB";
2077                     else
2078                     {
2079                         FIXME("Unsupported 3D grad function.\n");
2080                         sample_function->name = "unsupported3DGrad";
2081                     }
2082                 }
2083                 else
2084                 {
2085                     sample_function->name = projected ? "texture3DProj" : "texture3D";
2086                 }
2087                 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2088             }
2089             break;
2090
2091         default:
2092             sample_function->name = "";
2093             sample_function->coord_mask = 0;
2094             FIXME("Unrecognized sampler type: %#x;\n", sampler_type);
2095             break;
2096     }
2097 }
2098
2099 static void shader_glsl_append_fixup_arg(char *arguments, const char *reg_name,
2100         BOOL sign_fixup, enum fixup_channel_source channel_source)
2101 {
2102     switch(channel_source)
2103     {
2104         case CHANNEL_SOURCE_ZERO:
2105             strcat(arguments, "0.0");
2106             break;
2107
2108         case CHANNEL_SOURCE_ONE:
2109             strcat(arguments, "1.0");
2110             break;
2111
2112         case CHANNEL_SOURCE_X:
2113             strcat(arguments, reg_name);
2114             strcat(arguments, ".x");
2115             break;
2116
2117         case CHANNEL_SOURCE_Y:
2118             strcat(arguments, reg_name);
2119             strcat(arguments, ".y");
2120             break;
2121
2122         case CHANNEL_SOURCE_Z:
2123             strcat(arguments, reg_name);
2124             strcat(arguments, ".z");
2125             break;
2126
2127         case CHANNEL_SOURCE_W:
2128             strcat(arguments, reg_name);
2129             strcat(arguments, ".w");
2130             break;
2131
2132         default:
2133             FIXME("Unhandled channel source %#x\n", channel_source);
2134             strcat(arguments, "undefined");
2135             break;
2136     }
2137
2138     if (sign_fixup) strcat(arguments, " * 2.0 - 1.0");
2139 }
2140
2141 static void shader_glsl_color_correction_ext(struct wined3d_shader_buffer *buffer,
2142         const char *reg_name, DWORD mask, struct color_fixup_desc fixup)
2143 {
2144     unsigned int mask_size, remaining;
2145     DWORD fixup_mask = 0;
2146     char arguments[256];
2147     char mask_str[6];
2148
2149     if (fixup.x_sign_fixup || fixup.x_source != CHANNEL_SOURCE_X) fixup_mask |= WINED3DSP_WRITEMASK_0;
2150     if (fixup.y_sign_fixup || fixup.y_source != CHANNEL_SOURCE_Y) fixup_mask |= WINED3DSP_WRITEMASK_1;
2151     if (fixup.z_sign_fixup || fixup.z_source != CHANNEL_SOURCE_Z) fixup_mask |= WINED3DSP_WRITEMASK_2;
2152     if (fixup.w_sign_fixup || fixup.w_source != CHANNEL_SOURCE_W) fixup_mask |= WINED3DSP_WRITEMASK_3;
2153     if (!(mask &= fixup_mask))
2154         return;
2155
2156     if (is_complex_fixup(fixup))
2157     {
2158         enum complex_fixup complex_fixup = get_complex_fixup(fixup);
2159         FIXME("Complex fixup (%#x) not supported\n",complex_fixup);
2160         return;
2161     }
2162
2163     shader_glsl_write_mask_to_str(mask, mask_str);
2164     mask_size = shader_glsl_get_write_mask_size(mask);
2165
2166     arguments[0] = '\0';
2167     remaining = mask_size;
2168     if (mask & WINED3DSP_WRITEMASK_0)
2169     {
2170         shader_glsl_append_fixup_arg(arguments, reg_name, fixup.x_sign_fixup, fixup.x_source);
2171         if (--remaining) strcat(arguments, ", ");
2172     }
2173     if (mask & WINED3DSP_WRITEMASK_1)
2174     {
2175         shader_glsl_append_fixup_arg(arguments, reg_name, fixup.y_sign_fixup, fixup.y_source);
2176         if (--remaining) strcat(arguments, ", ");
2177     }
2178     if (mask & WINED3DSP_WRITEMASK_2)
2179     {
2180         shader_glsl_append_fixup_arg(arguments, reg_name, fixup.z_sign_fixup, fixup.z_source);
2181         if (--remaining) strcat(arguments, ", ");
2182     }
2183     if (mask & WINED3DSP_WRITEMASK_3)
2184     {
2185         shader_glsl_append_fixup_arg(arguments, reg_name, fixup.w_sign_fixup, fixup.w_source);
2186         if (--remaining) strcat(arguments, ", ");
2187     }
2188
2189     if (mask_size > 1)
2190         shader_addline(buffer, "%s%s = vec%u(%s);\n", reg_name, mask_str, mask_size, arguments);
2191     else
2192         shader_addline(buffer, "%s%s = %s;\n", reg_name, mask_str, arguments);
2193 }
2194
2195 static void shader_glsl_color_correction(const struct wined3d_shader_instruction *ins, struct color_fixup_desc fixup)
2196 {
2197     char reg_name[256];
2198     BOOL is_color;
2199
2200     shader_glsl_get_register_name(&ins->dst[0].reg, reg_name, &is_color, ins);
2201     shader_glsl_color_correction_ext(ins->ctx->buffer, reg_name, ins->dst[0].write_mask, fixup);
2202 }
2203
2204 static void PRINTF_ATTR(8, 9) shader_glsl_gen_sample_code(const struct wined3d_shader_instruction *ins,
2205         DWORD sampler, const struct glsl_sample_function *sample_function, DWORD swizzle,
2206         const char *dx, const char *dy, const char *bias, const char *coord_reg_fmt, ...)
2207 {
2208     const struct wined3d_shader_version *version = &ins->ctx->reg_maps->shader_version;
2209     char dst_swizzle[6];
2210     struct color_fixup_desc fixup;
2211     BOOL np2_fixup = FALSE;
2212     va_list args;
2213
2214     shader_glsl_swizzle_to_str(swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle);
2215
2216     if (version->type == WINED3D_SHADER_TYPE_PIXEL)
2217     {
2218         const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
2219         fixup = priv->cur_ps_args->color_fixup[sampler];
2220
2221         if(priv->cur_ps_args->np2_fixup & (1 << sampler)) {
2222             if(bias) {
2223                 FIXME("Biased sampling from NP2 textures is unsupported\n");
2224             } else {
2225                 np2_fixup = TRUE;
2226             }
2227         }
2228     }
2229     else
2230     {
2231         fixup = COLOR_FIXUP_IDENTITY; /* FIXME: Vshader color fixup */
2232     }
2233
2234     shader_glsl_append_dst(ins->ctx->buffer, ins);
2235
2236     shader_addline(ins->ctx->buffer, "%s(%s_sampler%u, ",
2237             sample_function->name, shader_glsl_get_prefix(version->type), sampler);
2238
2239     va_start(args, coord_reg_fmt);
2240     shader_vaddline(ins->ctx->buffer, coord_reg_fmt, args);
2241     va_end(args);
2242
2243     if(bias) {
2244         shader_addline(ins->ctx->buffer, ", %s)%s);\n", bias, dst_swizzle);
2245     } else {
2246         if (np2_fixup) {
2247             const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
2248             const unsigned char idx = priv->cur_np2fixup_info->idx[sampler];
2249
2250             shader_addline(ins->ctx->buffer, " * ps_samplerNP2Fixup[%u].%s)%s);\n", idx >> 1,
2251                            (idx % 2) ? "zw" : "xy", dst_swizzle);
2252         } else if(dx && dy) {
2253             shader_addline(ins->ctx->buffer, ", %s, %s)%s);\n", dx, dy, dst_swizzle);
2254         } else {
2255             shader_addline(ins->ctx->buffer, ")%s);\n", dst_swizzle);
2256         }
2257     }
2258
2259     if(!is_identity_fixup(fixup)) {
2260         shader_glsl_color_correction(ins, fixup);
2261     }
2262 }
2263
2264 /*****************************************************************************
2265  * Begin processing individual instruction opcodes
2266  ****************************************************************************/
2267
2268 static void shader_glsl_binop(const struct wined3d_shader_instruction *ins)
2269 {
2270     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2271     struct glsl_src_param src0_param;
2272     struct glsl_src_param src1_param;
2273     DWORD write_mask;
2274     const char *op;
2275
2276     /* Determine the GLSL operator to use based on the opcode */
2277     switch (ins->handler_idx)
2278     {
2279         case WINED3DSIH_ADD:  op = "+";  break;
2280         case WINED3DSIH_AND:  op = "&";  break;
2281         case WINED3DSIH_DIV:  op = "/";  break;
2282         case WINED3DSIH_IADD: op = "+";  break;
2283         case WINED3DSIH_MUL:  op = "*";  break;
2284         case WINED3DSIH_SUB:  op = "-";  break;
2285         case WINED3DSIH_USHR: op = ">>"; break;
2286         case WINED3DSIH_XOR:  op = "^";  break;
2287         default:
2288             op = "<unhandled operator>";
2289             FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx);
2290             break;
2291     }
2292
2293     write_mask = shader_glsl_append_dst(buffer, ins);
2294     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2295     shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2296     shader_addline(buffer, "%s %s %s);\n", src0_param.param_str, op, src1_param.param_str);
2297 }
2298
2299 static void shader_glsl_relop(const struct wined3d_shader_instruction *ins)
2300 {
2301     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2302     struct glsl_src_param src0_param;
2303     struct glsl_src_param src1_param;
2304     unsigned int mask_size;
2305     DWORD write_mask;
2306     const char *op;
2307
2308     write_mask = shader_glsl_append_dst(buffer, ins);
2309     mask_size = shader_glsl_get_write_mask_size(write_mask);
2310     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2311     shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2312
2313     if (mask_size > 1)
2314     {
2315         switch (ins->handler_idx)
2316         {
2317             case WINED3DSIH_EQ:  op = "equal"; break;
2318             case WINED3DSIH_GE:  op = "greaterThanEqual"; break;
2319             case WINED3DSIH_IGE: op = "greaterThanEqual"; break;
2320             case WINED3DSIH_LT:  op = "lessThan"; break;
2321             default:
2322                 op = "<unhandled operator>";
2323                 ERR("Unhandled opcode %#x.\n", ins->handler_idx);
2324                 break;
2325         }
2326
2327         shader_addline(buffer, "uvec%u(%s(%s, %s)) * 0xffffffffu);\n",
2328                 mask_size, op, src0_param.param_str, src1_param.param_str);
2329     }
2330     else
2331     {
2332         switch (ins->handler_idx)
2333         {
2334             case WINED3DSIH_EQ:  op = "=="; break;
2335             case WINED3DSIH_GE:  op = ">="; break;
2336             case WINED3DSIH_IGE: op = ">="; break;
2337             case WINED3DSIH_LT:  op = "<"; break;
2338             default:
2339                 op = "<unhandled operator>";
2340                 ERR("Unhandled opcode %#x.\n", ins->handler_idx);
2341                 break;
2342         }
2343
2344         shader_addline(buffer, "%s %s %s ? 0xffffffffu : 0u);\n",
2345                 src0_param.param_str, op, src1_param.param_str);
2346     }
2347 }
2348
2349 static void shader_glsl_imul(const struct wined3d_shader_instruction *ins)
2350 {
2351     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2352     struct glsl_src_param src0_param;
2353     struct glsl_src_param src1_param;
2354     DWORD write_mask;
2355
2356     /* If we have ARB_gpu_shader5 or GLSL 4.0, we can use imulExtended(). If
2357      * not, we can emulate it. */
2358     if (ins->dst[0].reg.type != WINED3DSPR_NULL)
2359         FIXME("64-bit integer multiplies not implemented.\n");
2360
2361     if (ins->dst[1].reg.type != WINED3DSPR_NULL)
2362     {
2363         write_mask = shader_glsl_append_dst_ext(buffer, ins, &ins->dst[1]);
2364         shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2365         shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2366
2367         shader_addline(ins->ctx->buffer, "%s * %s);\n",
2368                 src0_param.param_str, src1_param.param_str);
2369     }
2370 }
2371
2372 static void shader_glsl_udiv(const struct wined3d_shader_instruction *ins)
2373 {
2374     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2375     struct glsl_src_param src0_param, src1_param;
2376     DWORD write_mask;
2377
2378     if (ins->dst[0].reg.type != WINED3DSPR_NULL)
2379     {
2380
2381         if (ins->dst[1].reg.type != WINED3DSPR_NULL)
2382         {
2383             char dst_mask[6];
2384
2385             write_mask = shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2386             shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2387             shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2388             shader_addline(buffer, "tmp0%s = %s / %s;\n",
2389                     dst_mask, src0_param.param_str, src1_param.param_str);
2390
2391             write_mask = shader_glsl_append_dst_ext(buffer, ins, &ins->dst[1]);
2392             shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2393             shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2394             shader_addline(buffer, "%s %% %s));\n", src0_param.param_str, src1_param.param_str);
2395
2396             shader_glsl_append_dst_ext(buffer, ins, &ins->dst[0]);
2397             shader_addline(buffer, "tmp0%s);\n", dst_mask);
2398         }
2399         else
2400         {
2401             write_mask = shader_glsl_append_dst_ext(buffer, ins, &ins->dst[0]);
2402             shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2403             shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2404             shader_addline(buffer, "%s / %s);\n", src0_param.param_str, src1_param.param_str);
2405         }
2406     }
2407     else if (ins->dst[1].reg.type != WINED3DSPR_NULL)
2408     {
2409         write_mask = shader_glsl_append_dst_ext(buffer, ins, &ins->dst[1]);
2410         shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2411         shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2412         shader_addline(buffer, "%s %% %s);\n", src0_param.param_str, src1_param.param_str);
2413     }
2414 }
2415
2416 /* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
2417 static void shader_glsl_mov(const struct wined3d_shader_instruction *ins)
2418 {
2419     const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
2420     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2421     struct glsl_src_param src0_param;
2422     DWORD write_mask;
2423
2424     write_mask = shader_glsl_append_dst(buffer, ins);
2425     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2426
2427     /* In vs_1_1 WINED3DSIO_MOV can write to the address register. In later
2428      * shader versions WINED3DSIO_MOVA is used for this. */
2429     if (ins->ctx->reg_maps->shader_version.major == 1
2430             && ins->ctx->reg_maps->shader_version.type == WINED3D_SHADER_TYPE_VERTEX
2431             && ins->dst[0].reg.type == WINED3DSPR_ADDR)
2432     {
2433         /* This is a simple floor() */
2434         unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2435         if (mask_size > 1) {
2436             shader_addline(buffer, "ivec%d(floor(%s)));\n", mask_size, src0_param.param_str);
2437         } else {
2438             shader_addline(buffer, "int(floor(%s)));\n", src0_param.param_str);
2439         }
2440     }
2441     else if(ins->handler_idx == WINED3DSIH_MOVA)
2442     {
2443         /* We need to *round* to the nearest int here. */
2444         unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2445
2446         if (gl_info->supported[EXT_GPU_SHADER4])
2447         {
2448             if (mask_size > 1)
2449                 shader_addline(buffer, "ivec%d(round(%s)));\n", mask_size, src0_param.param_str);
2450             else
2451                 shader_addline(buffer, "int(round(%s)));\n", src0_param.param_str);
2452         }
2453         else
2454         {
2455             if (mask_size > 1)
2456                 shader_addline(buffer, "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s)));\n",
2457                         mask_size, src0_param.param_str, mask_size, src0_param.param_str);
2458             else
2459                 shader_addline(buffer, "int(floor(abs(%s) + 0.5) * sign(%s)));\n",
2460                         src0_param.param_str, src0_param.param_str);
2461         }
2462     }
2463     else
2464     {
2465         shader_addline(buffer, "%s);\n", src0_param.param_str);
2466     }
2467 }
2468
2469 /* Process the dot product operators DP3 and DP4 in GLSL (dst = dot(src0, src1)) */
2470 static void shader_glsl_dot(const struct wined3d_shader_instruction *ins)
2471 {
2472     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2473     struct glsl_src_param src0_param;
2474     struct glsl_src_param src1_param;
2475     DWORD dst_write_mask, src_write_mask;
2476     unsigned int dst_size = 0;
2477
2478     dst_write_mask = shader_glsl_append_dst(buffer, ins);
2479     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
2480
2481     /* dp3 works on vec3, dp4 on vec4 */
2482     if (ins->handler_idx == WINED3DSIH_DP4)
2483     {
2484         src_write_mask = WINED3DSP_WRITEMASK_ALL;
2485     } else {
2486         src_write_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2487     }
2488
2489     shader_glsl_add_src_param(ins, &ins->src[0], src_write_mask, &src0_param);
2490     shader_glsl_add_src_param(ins, &ins->src[1], src_write_mask, &src1_param);
2491
2492     if (dst_size > 1) {
2493         shader_addline(buffer, "vec%d(dot(%s, %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
2494     } else {
2495         shader_addline(buffer, "dot(%s, %s));\n", src0_param.param_str, src1_param.param_str);
2496     }
2497 }
2498
2499 /* Note that this instruction has some restrictions. The destination write mask
2500  * can't contain the w component, and the source swizzles have to be .xyzw */
2501 static void shader_glsl_cross(const struct wined3d_shader_instruction *ins)
2502 {
2503     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2504     struct glsl_src_param src0_param;
2505     struct glsl_src_param src1_param;
2506     char dst_mask[6];
2507
2508     shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2509     shader_glsl_append_dst(ins->ctx->buffer, ins);
2510     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
2511     shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param);
2512     shader_addline(ins->ctx->buffer, "cross(%s, %s)%s);\n", src0_param.param_str, src1_param.param_str, dst_mask);
2513 }
2514
2515 static void shader_glsl_cut(const struct wined3d_shader_instruction *ins)
2516 {
2517     shader_addline(ins->ctx->buffer, "EndPrimitive();\n");
2518 }
2519
2520 /* Process the WINED3DSIO_POW instruction in GLSL (dst = |src0|^src1)
2521  * Src0 and src1 are scalars. Note that D3D uses the absolute of src0, while
2522  * GLSL uses the value as-is. */
2523 static void shader_glsl_pow(const struct wined3d_shader_instruction *ins)
2524 {
2525     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2526     struct glsl_src_param src0_param;
2527     struct glsl_src_param src1_param;
2528     DWORD dst_write_mask;
2529     unsigned int dst_size;
2530
2531     dst_write_mask = shader_glsl_append_dst(buffer, ins);
2532     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
2533
2534     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2535     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
2536
2537     if (dst_size > 1)
2538     {
2539         shader_addline(buffer, "vec%u(%s == 0.0 ? 1.0 : pow(abs(%s), %s)));\n",
2540                 dst_size, src1_param.param_str, src0_param.param_str, src1_param.param_str);
2541     }
2542     else
2543     {
2544         shader_addline(buffer, "%s == 0.0 ? 1.0 : pow(abs(%s), %s));\n",
2545                 src1_param.param_str, src0_param.param_str, src1_param.param_str);
2546     }
2547 }
2548
2549 /* Process the WINED3DSIO_LOG instruction in GLSL (dst = log2(|src0|))
2550  * Src0 is a scalar. Note that D3D uses the absolute of src0, while
2551  * GLSL uses the value as-is. */
2552 static void shader_glsl_log(const struct wined3d_shader_instruction *ins)
2553 {
2554     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2555     struct glsl_src_param src0_param;
2556     DWORD dst_write_mask;
2557     unsigned int dst_size;
2558
2559     dst_write_mask = shader_glsl_append_dst(buffer, ins);
2560     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
2561
2562     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2563
2564     if (dst_size > 1)
2565     {
2566         shader_addline(buffer, "vec%u(log2(abs(%s))));\n",
2567                 dst_size, src0_param.param_str);
2568     }
2569     else
2570     {
2571         shader_addline(buffer, "log2(abs(%s)));\n",
2572                 src0_param.param_str);
2573     }
2574 }
2575
2576 /* Map the opcode 1-to-1 to the GL code (arg->dst = instruction(src0, src1, ...) */
2577 static void shader_glsl_map2gl(const struct wined3d_shader_instruction *ins)
2578 {
2579     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2580     struct glsl_src_param src_param;
2581     const char *instruction;
2582     DWORD write_mask;
2583     unsigned i;
2584
2585     /* Determine the GLSL function to use based on the opcode */
2586     /* TODO: Possibly make this a table for faster lookups */
2587     switch (ins->handler_idx)
2588     {
2589         case WINED3DSIH_MIN: instruction = "min"; break;
2590         case WINED3DSIH_MAX: instruction = "max"; break;
2591         case WINED3DSIH_ABS: instruction = "abs"; break;
2592         case WINED3DSIH_FRC: instruction = "fract"; break;
2593         case WINED3DSIH_EXP: instruction = "exp2"; break;
2594         case WINED3DSIH_DSX: instruction = "dFdx"; break;
2595         case WINED3DSIH_DSY: instruction = "ycorrection.y * dFdy"; break;
2596         case WINED3DSIH_ROUND_NI: instruction = "floor"; break;
2597         default: instruction = "";
2598             FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx);
2599             break;
2600     }
2601
2602     write_mask = shader_glsl_append_dst(buffer, ins);
2603
2604     shader_addline(buffer, "%s(", instruction);
2605
2606     if (ins->src_count)
2607     {
2608         shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
2609         shader_addline(buffer, "%s", src_param.param_str);
2610         for (i = 1; i < ins->src_count; ++i)
2611         {
2612             shader_glsl_add_src_param(ins, &ins->src[i], write_mask, &src_param);
2613             shader_addline(buffer, ", %s", src_param.param_str);
2614         }
2615     }
2616
2617     shader_addline(buffer, "));\n");
2618 }
2619
2620 static void shader_glsl_nop(const struct wined3d_shader_instruction *ins) {}
2621
2622 static void shader_glsl_nrm(const struct wined3d_shader_instruction *ins)
2623 {
2624     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2625     struct glsl_src_param src_param;
2626     unsigned int mask_size;
2627     DWORD write_mask;
2628     char dst_mask[6];
2629
2630     write_mask = shader_glsl_get_write_mask(ins->dst, dst_mask);
2631     mask_size = shader_glsl_get_write_mask_size(write_mask);
2632     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
2633
2634     shader_addline(buffer, "tmp0.x = dot(%s, %s);\n",
2635             src_param.param_str, src_param.param_str);
2636     shader_glsl_append_dst(buffer, ins);
2637
2638     if (mask_size > 1)
2639     {
2640         shader_addline(buffer, "tmp0.x == 0.0 ? vec%u(0.0) : (%s * inversesqrt(tmp0.x)));\n",
2641                 mask_size, src_param.param_str);
2642     }
2643     else
2644     {
2645         shader_addline(buffer, "tmp0.x == 0.0 ? 0.0 : (%s * inversesqrt(tmp0.x)));\n",
2646                 src_param.param_str);
2647     }
2648 }
2649
2650 /** Process the WINED3DSIO_EXPP instruction in GLSL:
2651  * For shader model 1.x, do the following (and honor the writemask, so use a temporary variable):
2652  *   dst.x = 2^(floor(src))
2653  *   dst.y = src - floor(src)
2654  *   dst.z = 2^src   (partial precision is allowed, but optional)
2655  *   dst.w = 1.0;
2656  * For 2.0 shaders, just do this (honoring writemask and swizzle):
2657  *   dst = 2^src;    (partial precision is allowed, but optional)
2658  */
2659 static void shader_glsl_expp(const struct wined3d_shader_instruction *ins)
2660 {
2661     struct glsl_src_param src_param;
2662
2663     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src_param);
2664
2665     if (ins->ctx->reg_maps->shader_version.major < 2)
2666     {
2667         char dst_mask[6];
2668
2669         shader_addline(ins->ctx->buffer, "tmp0.x = exp2(floor(%s));\n", src_param.param_str);
2670         shader_addline(ins->ctx->buffer, "tmp0.y = %s - floor(%s);\n", src_param.param_str, src_param.param_str);
2671         shader_addline(ins->ctx->buffer, "tmp0.z = exp2(%s);\n", src_param.param_str);
2672         shader_addline(ins->ctx->buffer, "tmp0.w = 1.0;\n");
2673
2674         shader_glsl_append_dst(ins->ctx->buffer, ins);
2675         shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2676         shader_addline(ins->ctx->buffer, "tmp0%s);\n", dst_mask);
2677     } else {
2678         DWORD write_mask;
2679         unsigned int mask_size;
2680
2681         write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2682         mask_size = shader_glsl_get_write_mask_size(write_mask);
2683
2684         if (mask_size > 1) {
2685             shader_addline(ins->ctx->buffer, "vec%d(exp2(%s)));\n", mask_size, src_param.param_str);
2686         } else {
2687             shader_addline(ins->ctx->buffer, "exp2(%s));\n", src_param.param_str);
2688         }
2689     }
2690 }
2691
2692 static void shader_glsl_to_int(const struct wined3d_shader_instruction *ins)
2693 {
2694     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2695     struct glsl_src_param src_param;
2696     unsigned int mask_size;
2697     DWORD write_mask;
2698
2699     write_mask = shader_glsl_append_dst(buffer, ins);
2700     mask_size = shader_glsl_get_write_mask_size(write_mask);
2701     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
2702
2703     if (mask_size > 1)
2704         shader_addline(buffer, "ivec%u(%s));\n", mask_size, src_param.param_str);
2705     else
2706         shader_addline(buffer, "int(%s));\n", src_param.param_str);
2707 }
2708
2709 static void shader_glsl_to_float(const struct wined3d_shader_instruction *ins)
2710 {
2711     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2712     struct glsl_src_param src_param;
2713     unsigned int mask_size;
2714     DWORD write_mask;
2715
2716     write_mask = shader_glsl_append_dst(buffer, ins);
2717     mask_size = shader_glsl_get_write_mask_size(write_mask);
2718     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
2719
2720     if (mask_size > 1)
2721         shader_addline(buffer, "vec%u(%s));\n", mask_size, src_param.param_str);
2722     else
2723         shader_addline(buffer, "float(%s));\n", src_param.param_str);
2724 }
2725
2726 /** Process the RCP (reciprocal or inverse) opcode in GLSL (dst = 1 / src) */
2727 static void shader_glsl_rcp(const struct wined3d_shader_instruction *ins)
2728 {
2729     struct glsl_src_param src_param;
2730     DWORD write_mask;
2731     unsigned int mask_size;
2732
2733     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2734     mask_size = shader_glsl_get_write_mask_size(write_mask);
2735     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param);
2736
2737     if (mask_size > 1)
2738     {
2739         shader_addline(ins->ctx->buffer, "vec%u(1.0 / %s));\n",
2740                 mask_size, src_param.param_str);
2741     }
2742     else
2743     {
2744         shader_addline(ins->ctx->buffer, "1.0 / %s);\n",
2745                 src_param.param_str);
2746     }
2747 }
2748
2749 static void shader_glsl_rsq(const struct wined3d_shader_instruction *ins)
2750 {
2751     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2752     struct glsl_src_param src_param;
2753     DWORD write_mask;
2754     unsigned int mask_size;
2755
2756     write_mask = shader_glsl_append_dst(buffer, ins);
2757     mask_size = shader_glsl_get_write_mask_size(write_mask);
2758
2759     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param);
2760
2761     if (mask_size > 1)
2762     {
2763         shader_addline(buffer, "vec%u(inversesqrt(abs(%s))));\n",
2764                 mask_size, src_param.param_str);
2765     }
2766     else
2767     {
2768         shader_addline(buffer, "inversesqrt(abs(%s)));\n",
2769                 src_param.param_str);
2770     }
2771 }
2772
2773 /** Process signed comparison opcodes in GLSL. */
2774 static void shader_glsl_compare(const struct wined3d_shader_instruction *ins)
2775 {
2776     struct glsl_src_param src0_param;
2777     struct glsl_src_param src1_param;
2778     DWORD write_mask;
2779     unsigned int mask_size;
2780
2781     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2782     mask_size = shader_glsl_get_write_mask_size(write_mask);
2783     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2784     shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2785
2786     if (mask_size > 1) {
2787         const char *compare;
2788
2789         switch(ins->handler_idx)
2790         {
2791             case WINED3DSIH_SLT: compare = "lessThan"; break;
2792             case WINED3DSIH_SGE: compare = "greaterThanEqual"; break;
2793             default: compare = "";
2794                 FIXME("Can't handle opcode %#x\n", ins->handler_idx);
2795         }
2796
2797         shader_addline(ins->ctx->buffer, "vec%d(%s(%s, %s)));\n", mask_size, compare,
2798                 src0_param.param_str, src1_param.param_str);
2799     } else {
2800         switch(ins->handler_idx)
2801         {
2802             case WINED3DSIH_SLT:
2803                 /* Step(src0, src1) is not suitable here because if src0 == src1 SLT is supposed,
2804                  * to return 0.0 but step returns 1.0 because step is not < x
2805                  * An alternative is a bvec compare padded with an unused second component.
2806                  * step(src1 * -1.0, src0 * -1.0) is not an option because it suffers from the same
2807                  * issue. Playing with not() is not possible either because not() does not accept
2808                  * a scalar.
2809                  */
2810                 shader_addline(ins->ctx->buffer, "(%s < %s) ? 1.0 : 0.0);\n",
2811                         src0_param.param_str, src1_param.param_str);
2812                 break;
2813             case WINED3DSIH_SGE:
2814                 /* Here we can use the step() function and safe a conditional */
2815                 shader_addline(ins->ctx->buffer, "step(%s, %s));\n", src1_param.param_str, src0_param.param_str);
2816                 break;
2817             default:
2818                 FIXME("Can't handle opcode %#x\n", ins->handler_idx);
2819         }
2820
2821     }
2822 }
2823
2824 static void shader_glsl_conditional_move(const struct wined3d_shader_instruction *ins)
2825 {
2826     const char *condition_prefix, *condition_suffix;
2827     struct wined3d_shader_dst_param dst;
2828     struct glsl_src_param src0_param;
2829     struct glsl_src_param src1_param;
2830     struct glsl_src_param src2_param;
2831     BOOL temp_destination = FALSE;
2832     DWORD cmp_channel = 0;
2833     unsigned int i, j;
2834     char mask_char[6];
2835     DWORD write_mask;
2836
2837     switch (ins->handler_idx)
2838     {
2839         case WINED3DSIH_CMP:
2840             condition_prefix = "";
2841             condition_suffix = " >= 0.0";
2842             break;
2843
2844         case WINED3DSIH_CND:
2845             condition_prefix = "";
2846             condition_suffix = " > 0.5";
2847             break;
2848
2849         case WINED3DSIH_MOVC:
2850             condition_prefix = "bool(";
2851             condition_suffix = ")";
2852             break;
2853
2854         default:
2855             FIXME("Unhandled instruction %#x.\n", ins->handler_idx);
2856             condition_prefix = "<unhandled prefix>";
2857             condition_suffix = "<unhandled suffix>";
2858             break;
2859     }
2860
2861     if (shader_is_scalar(&ins->dst[0].reg) || shader_is_scalar(&ins->src[0].reg))
2862     {
2863         write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2864         shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2865         shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2866         shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2867
2868         shader_addline(ins->ctx->buffer, "%s%s%s ? %s : %s);\n",
2869                 condition_prefix, src0_param.param_str, condition_suffix,
2870                 src1_param.param_str, src2_param.param_str);
2871         return;
2872     }
2873
2874     dst = ins->dst[0];
2875
2876     /* Splitting the instruction up in multiple lines imposes a problem:
2877      * The first lines may overwrite source parameters of the following lines.
2878      * Deal with that by using a temporary destination register if needed. */
2879     if ((ins->src[0].reg.idx[0].offset == dst.reg.idx[0].offset
2880                 && ins->src[0].reg.type == dst.reg.type)
2881             || (ins->src[1].reg.idx[0].offset == dst.reg.idx[0].offset
2882                 && ins->src[1].reg.type == dst.reg.type)
2883             || (ins->src[2].reg.idx[0].offset == dst.reg.idx[0].offset
2884                 && ins->src[2].reg.type == dst.reg.type))
2885         temp_destination = TRUE;
2886
2887     /* Cycle through all source0 channels. */
2888     for (i = 0; i < 4; ++i)
2889     {
2890         write_mask = 0;
2891         /* Find the destination channels which use the current source0 channel. */
2892         for (j = 0; j < 4; ++j)
2893         {
2894             if (((ins->src[0].swizzle >> (2 * j)) & 0x3) == i)
2895             {
2896                 write_mask |= WINED3DSP_WRITEMASK_0 << j;
2897                 cmp_channel = WINED3DSP_WRITEMASK_0 << j;
2898             }
2899         }
2900         dst.write_mask = ins->dst[0].write_mask & write_mask;
2901
2902         if (temp_destination)
2903         {
2904             if (!(write_mask = shader_glsl_get_write_mask(&dst, mask_char)))
2905                 continue;
2906             shader_addline(ins->ctx->buffer, "tmp0%s = (", mask_char);
2907         }
2908         else if (!(write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst)))
2909             continue;
2910
2911         shader_glsl_add_src_param(ins, &ins->src[0], cmp_channel, &src0_param);
2912         shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2913         shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2914
2915         shader_addline(ins->ctx->buffer, "%s%s%s ? %s : %s);\n",
2916                 condition_prefix, src0_param.param_str, condition_suffix,
2917                 src1_param.param_str, src2_param.param_str);
2918     }
2919
2920     if (temp_destination)
2921     {
2922         shader_glsl_get_write_mask(&ins->dst[0], mask_char);
2923         shader_glsl_append_dst(ins->ctx->buffer, ins);
2924         shader_addline(ins->ctx->buffer, "tmp0%s);\n", mask_char);
2925     }
2926 }
2927
2928 /** Process the CND opcode in GLSL (dst = (src0 > 0.5) ? src1 : src2) */
2929 /* For ps 1.1-1.3, only a single component of src0 is used. For ps 1.4
2930  * the compare is done per component of src0. */
2931 static void shader_glsl_cnd(const struct wined3d_shader_instruction *ins)
2932 {
2933     struct glsl_src_param src0_param;
2934     struct glsl_src_param src1_param;
2935     struct glsl_src_param src2_param;
2936     DWORD write_mask;
2937     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
2938             ins->ctx->reg_maps->shader_version.minor);
2939
2940     if (shader_version < WINED3D_SHADER_VERSION(1, 4))
2941     {
2942         write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2943         shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2944         shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2945         shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2946
2947         /* Fun: The D3DSI_COISSUE flag changes the semantic of the cnd instruction for < 1.4 shaders */
2948         if (ins->coissue)
2949         {
2950             shader_addline(ins->ctx->buffer, "%s /* COISSUE! */);\n", src1_param.param_str);
2951         } else {
2952             shader_addline(ins->ctx->buffer, "%s > 0.5 ? %s : %s);\n",
2953                     src0_param.param_str, src1_param.param_str, src2_param.param_str);
2954         }
2955         return;
2956     }
2957
2958     shader_glsl_conditional_move(ins);
2959 }
2960
2961 /** GLSL code generation for WINED3DSIO_MAD: Multiply the first 2 opcodes, then add the last */
2962 static void shader_glsl_mad(const struct wined3d_shader_instruction *ins)
2963 {
2964     struct glsl_src_param src0_param;
2965     struct glsl_src_param src1_param;
2966     struct glsl_src_param src2_param;
2967     DWORD write_mask;
2968
2969     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2970     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2971     shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2972     shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2973     shader_addline(ins->ctx->buffer, "(%s * %s) + %s);\n",
2974             src0_param.param_str, src1_param.param_str, src2_param.param_str);
2975 }
2976
2977 /* Handles transforming all WINED3DSIO_M?x? opcodes for
2978    Vertex shaders to GLSL codes */
2979 static void shader_glsl_mnxn(const struct wined3d_shader_instruction *ins)
2980 {
2981     int i;
2982     int nComponents = 0;
2983     struct wined3d_shader_dst_param tmp_dst = {{0}};
2984     struct wined3d_shader_src_param tmp_src[2] = {{{0}}};
2985     struct wined3d_shader_instruction tmp_ins;
2986
2987     memset(&tmp_ins, 0, sizeof(tmp_ins));
2988
2989     /* Set constants for the temporary argument */
2990     tmp_ins.ctx = ins->ctx;
2991     tmp_ins.dst_count = 1;
2992     tmp_ins.dst = &tmp_dst;
2993     tmp_ins.src_count = 2;
2994     tmp_ins.src = tmp_src;
2995
2996     switch(ins->handler_idx)
2997     {
2998         case WINED3DSIH_M4x4:
2999             nComponents = 4;
3000             tmp_ins.handler_idx = WINED3DSIH_DP4;
3001             break;
3002         case WINED3DSIH_M4x3:
3003             nComponents = 3;
3004             tmp_ins.handler_idx = WINED3DSIH_DP4;
3005             break;
3006         case WINED3DSIH_M3x4:
3007             nComponents = 4;
3008             tmp_ins.handler_idx = WINED3DSIH_DP3;
3009             break;
3010         case WINED3DSIH_M3x3:
3011             nComponents = 3;
3012             tmp_ins.handler_idx = WINED3DSIH_DP3;
3013             break;
3014         case WINED3DSIH_M3x2:
3015             nComponents = 2;
3016             tmp_ins.handler_idx = WINED3DSIH_DP3;
3017             break;
3018         default:
3019             break;
3020     }
3021
3022     tmp_dst = ins->dst[0];
3023     tmp_src[0] = ins->src[0];
3024     tmp_src[1] = ins->src[1];
3025     for (i = 0; i < nComponents; ++i)
3026     {
3027         tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i;
3028         shader_glsl_dot(&tmp_ins);
3029         ++tmp_src[1].reg.idx[0].offset;
3030     }
3031 }
3032
3033 /**
3034     The LRP instruction performs a component-wise linear interpolation
3035     between the second and third operands using the first operand as the
3036     blend factor.  Equation:  (dst = src2 + src0 * (src1 - src2))
3037     This is equivalent to mix(src2, src1, src0);
3038 */
3039 static void shader_glsl_lrp(const struct wined3d_shader_instruction *ins)
3040 {
3041     struct glsl_src_param src0_param;
3042     struct glsl_src_param src1_param;
3043     struct glsl_src_param src2_param;
3044     DWORD write_mask;
3045
3046     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3047
3048     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3049     shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
3050     shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
3051
3052     shader_addline(ins->ctx->buffer, "mix(%s, %s, %s));\n",
3053             src2_param.param_str, src1_param.param_str, src0_param.param_str);
3054 }
3055
3056 /** Process the WINED3DSIO_LIT instruction in GLSL:
3057  * dst.x = dst.w = 1.0
3058  * dst.y = (src0.x > 0) ? src0.x
3059  * dst.z = (src0.x > 0) ? ((src0.y > 0) ? pow(src0.y, src.w) : 0) : 0
3060  *                                        where src.w is clamped at +- 128
3061  */
3062 static void shader_glsl_lit(const struct wined3d_shader_instruction *ins)
3063 {
3064     struct glsl_src_param src0_param;
3065     struct glsl_src_param src1_param;
3066     struct glsl_src_param src3_param;
3067     char dst_mask[6];
3068
3069     shader_glsl_append_dst(ins->ctx->buffer, ins);
3070     shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3071
3072     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3073     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src1_param);
3074     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src3_param);
3075
3076     /* The sdk specifies the instruction like this
3077      * dst.x = 1.0;
3078      * if(src.x > 0.0) dst.y = src.x
3079      * else dst.y = 0.0.
3080      * if(src.x > 0.0 && src.y > 0.0) dst.z = pow(src.y, power);
3081      * else dst.z = 0.0;
3082      * dst.w = 1.0;
3083      * (where power = src.w clamped between -128 and 128)
3084      *
3085      * Obviously that has quite a few conditionals in it which we don't like. So the first step is this:
3086      * dst.x = 1.0                                  ... No further explanation needed
3087      * dst.y = max(src.y, 0.0);                     ... If x < 0.0, use 0.0, otherwise x. Same as the conditional
3088      * dst.z = x > 0.0 ? pow(max(y, 0.0), p) : 0;   ... 0 ^ power is 0, and otherwise we use y anyway
3089      * dst.w = 1.0.                                 ... Nothing fancy.
3090      *
3091      * So we still have one conditional in there. So do this:
3092      * dst.z = pow(max(0.0, src.y) * step(0.0, src.x), power);
3093      *
3094      * step(0.0, x) will return 1 if src.x > 0.0, and 0 otherwise. So if y is 0 we get pow(0.0 * 1.0, power),
3095      * which sets dst.z to 0. If y > 0, but x = 0.0, we get pow(y * 0.0, power), which results in 0 too.
3096      * if both x and y are > 0, we get pow(y * 1.0, power), as it is supposed to.
3097      *
3098      * Unfortunately pow(0.0 ^ 0.0) returns NaN on most GPUs, but lit with src.y = 0 and src.w = 0 returns
3099      * a non-NaN value in dst.z. What we return doesn't matter, as long as it is not NaN. Return 0, which is
3100      * what all Windows HW drivers and GL_ARB_vertex_program's LIT do.
3101      */
3102     shader_addline(ins->ctx->buffer,
3103             "vec4(1.0, max(%s, 0.0), %s == 0.0 ? 0.0 : "
3104             "pow(max(0.0, %s) * step(0.0, %s), clamp(%s, -128.0, 128.0)), 1.0)%s);\n",
3105             src0_param.param_str, src3_param.param_str, src1_param.param_str,
3106             src0_param.param_str, src3_param.param_str, dst_mask);
3107 }
3108
3109 /** Process the WINED3DSIO_DST instruction in GLSL:
3110  * dst.x = 1.0
3111  * dst.y = src0.x * src0.y
3112  * dst.z = src0.z
3113  * dst.w = src1.w
3114  */
3115 static void shader_glsl_dst(const struct wined3d_shader_instruction *ins)
3116 {
3117     struct glsl_src_param src0y_param;
3118     struct glsl_src_param src0z_param;
3119     struct glsl_src_param src1y_param;
3120     struct glsl_src_param src1w_param;
3121     char dst_mask[6];
3122
3123     shader_glsl_append_dst(ins->ctx->buffer, ins);
3124     shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3125
3126     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src0y_param);
3127     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &src0z_param);
3128     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_1, &src1y_param);
3129     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_3, &src1w_param);
3130
3131     shader_addline(ins->ctx->buffer, "vec4(1.0, %s * %s, %s, %s))%s;\n",
3132             src0y_param.param_str, src1y_param.param_str, src0z_param.param_str, src1w_param.param_str, dst_mask);
3133 }
3134
3135 /** Process the WINED3DSIO_SINCOS instruction in GLSL:
3136  * VS 2.0 requires that specific cosine and sine constants be passed to this instruction so the hardware
3137  * can handle it.  But, these functions are built-in for GLSL, so we can just ignore the last 2 params.
3138  *
3139  * dst.x = cos(src0.?)
3140  * dst.y = sin(src0.?)
3141  * dst.z = dst.z
3142  * dst.w = dst.w
3143  */
3144 static void shader_glsl_sincos(const struct wined3d_shader_instruction *ins)
3145 {
3146     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3147     struct glsl_src_param src0_param;
3148     DWORD write_mask;
3149
3150     if (ins->ctx->reg_maps->shader_version.major < 4)
3151     {
3152         shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3153
3154         write_mask = shader_glsl_append_dst(buffer, ins);
3155         switch (write_mask)
3156         {
3157             case WINED3DSP_WRITEMASK_0:
3158                 shader_addline(buffer, "cos(%s));\n", src0_param.param_str);
3159                 break;
3160
3161             case WINED3DSP_WRITEMASK_1:
3162                 shader_addline(buffer, "sin(%s));\n", src0_param.param_str);
3163                 break;
3164
3165             case (WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1):
3166                 shader_addline(buffer, "vec2(cos(%s), sin(%s)));\n",
3167                         src0_param.param_str, src0_param.param_str);
3168                 break;
3169
3170             default:
3171                 ERR("Write mask should be .x, .y or .xy\n");
3172                 break;
3173         }
3174
3175         return;
3176     }
3177
3178     if (ins->dst[0].reg.type != WINED3DSPR_NULL)
3179     {
3180
3181         if (ins->dst[1].reg.type != WINED3DSPR_NULL)
3182         {
3183             char dst_mask[6];
3184
3185             write_mask = shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3186             shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3187             shader_addline(buffer, "tmp0%s = sin(%s);\n", dst_mask, src0_param.param_str);
3188
3189             write_mask = shader_glsl_append_dst_ext(buffer, ins, &ins->dst[1]);
3190             shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3191             shader_addline(buffer, "cos(%s));\n", src0_param.param_str);
3192
3193             shader_glsl_append_dst_ext(buffer, ins, &ins->dst[0]);
3194             shader_addline(buffer, "tmp0%s);\n", dst_mask);
3195         }
3196         else
3197         {
3198             write_mask = shader_glsl_append_dst_ext(buffer, ins, &ins->dst[0]);
3199             shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3200             shader_addline(buffer, "sin(%s));\n", src0_param.param_str);
3201         }
3202     }
3203     else if (ins->dst[1].reg.type != WINED3DSPR_NULL)
3204     {
3205         write_mask = shader_glsl_append_dst_ext(buffer, ins, &ins->dst[1]);
3206         shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3207         shader_addline(buffer, "cos(%s));\n", src0_param.param_str);
3208     }
3209 }
3210
3211 /* sgn in vs_2_0 has 2 extra parameters(registers for temporary storage) which we don't use
3212  * here. But those extra parameters require a dedicated function for sgn, since map2gl would
3213  * generate invalid code
3214  */
3215 static void shader_glsl_sgn(const struct wined3d_shader_instruction *ins)
3216 {
3217     struct glsl_src_param src0_param;
3218     DWORD write_mask;
3219
3220     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3221     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3222
3223     shader_addline(ins->ctx->buffer, "sign(%s));\n", src0_param.param_str);
3224 }
3225
3226 /** Process the WINED3DSIO_LOOP instruction in GLSL:
3227  * Start a for() loop where src1.y is the initial value of aL,
3228  *  increment aL by src1.z for a total of src1.x iterations.
3229  *  Need to use a temporary variable for this operation.
3230  */
3231 /* FIXME: I don't think nested loops will work correctly this way. */
3232 static void shader_glsl_loop(const struct wined3d_shader_instruction *ins)
3233 {
3234     struct wined3d_shader_loop_state *loop_state = ins->ctx->loop_state;
3235     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3236     const struct wined3d_shader *shader = ins->ctx->shader;
3237     const struct wined3d_shader_lconst *constant;
3238     struct glsl_src_param src1_param;
3239     const DWORD *control_values = NULL;
3240
3241     if (ins->ctx->reg_maps->shader_version.major < 4)
3242     {
3243         shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_ALL, &src1_param);
3244
3245         /* Try to hardcode the loop control parameters if possible. Direct3D 9
3246          * class hardware doesn't support real varying indexing, but Microsoft
3247          * designed this feature for Shader model 2.x+. If the loop control is
3248          * known at compile time, the GLSL compiler can unroll the loop, and
3249          * replace indirect addressing with direct addressing. */
3250         if (ins->src[1].reg.type == WINED3DSPR_CONSTINT)
3251         {
3252             LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry)
3253             {
3254                 if (constant->idx == ins->src[1].reg.idx[0].offset)
3255                 {
3256                     control_values = constant->value;
3257                     break;
3258                 }
3259             }
3260         }
3261
3262         if (control_values)
3263         {
3264             struct wined3d_shader_loop_control loop_control;
3265             loop_control.count = control_values[0];
3266             loop_control.start = control_values[1];
3267             loop_control.step = (int)control_values[2];
3268
3269             if (loop_control.step > 0)
3270             {
3271                 shader_addline(buffer, "for (aL%u = %u; aL%u < (%u * %d + %u); aL%u += %d)\n{\n",
3272                         loop_state->current_depth, loop_control.start,
3273                         loop_state->current_depth, loop_control.count, loop_control.step, loop_control.start,
3274                         loop_state->current_depth, loop_control.step);
3275             }
3276             else if (loop_control.step < 0)
3277             {
3278                 shader_addline(buffer, "for (aL%u = %u; aL%u > (%u * %d + %u); aL%u += %d)\n{\n",
3279                         loop_state->current_depth, loop_control.start,
3280                         loop_state->current_depth, loop_control.count, loop_control.step, loop_control.start,
3281                         loop_state->current_depth, loop_control.step);
3282             }
3283             else
3284             {
3285                 shader_addline(buffer, "for (aL%u = %u, tmpInt%u = 0; tmpInt%u < %u; tmpInt%u++)\n{\n",
3286                         loop_state->current_depth, loop_control.start, loop_state->current_depth,
3287                         loop_state->current_depth, loop_control.count,
3288                         loop_state->current_depth);
3289             }
3290         }
3291         else
3292         {
3293             shader_addline(buffer, "for (tmpInt%u = 0, aL%u = %s.y; tmpInt%u < %s.x; tmpInt%u++, aL%u += %s.z)\n{\n",
3294                     loop_state->current_depth, loop_state->current_reg,
3295                     src1_param.reg_name, loop_state->current_depth, src1_param.reg_name,
3296                     loop_state->current_depth, loop_state->current_reg, src1_param.reg_name);
3297         }
3298
3299         ++loop_state->current_reg;
3300     }
3301     else
3302     {
3303         shader_addline(buffer, "for (;;)\n{\n");
3304     }
3305
3306     ++loop_state->current_depth;
3307 }
3308
3309 static void shader_glsl_end(const struct wined3d_shader_instruction *ins)
3310 {
3311     struct wined3d_shader_loop_state *loop_state = ins->ctx->loop_state;
3312
3313     shader_addline(ins->ctx->buffer, "}\n");
3314
3315     if (ins->handler_idx == WINED3DSIH_ENDLOOP)
3316     {
3317         --loop_state->current_depth;
3318         --loop_state->current_reg;
3319     }
3320
3321     if (ins->handler_idx == WINED3DSIH_ENDREP)
3322     {
3323         --loop_state->current_depth;
3324     }
3325 }
3326
3327 static void shader_glsl_rep(const struct wined3d_shader_instruction *ins)
3328 {
3329     const struct wined3d_shader *shader = ins->ctx->shader;
3330     struct wined3d_shader_loop_state *loop_state = ins->ctx->loop_state;
3331     const struct wined3d_shader_lconst *constant;
3332     struct glsl_src_param src0_param;
3333     const DWORD *control_values = NULL;
3334
3335     /* Try to hardcode local values to help the GLSL compiler to unroll and optimize the loop */
3336     if (ins->src[0].reg.type == WINED3DSPR_CONSTINT)
3337     {
3338         LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry)
3339         {
3340             if (constant->idx == ins->src[0].reg.idx[0].offset)
3341             {
3342                 control_values = constant->value;
3343                 break;
3344             }
3345         }
3346     }
3347
3348     if (control_values)
3349     {
3350         shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %d; tmpInt%d++) {\n",
3351                 loop_state->current_depth, loop_state->current_depth,
3352                 control_values[0], loop_state->current_depth);
3353     }
3354     else
3355     {
3356         shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3357         shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %s; tmpInt%d++) {\n",
3358                 loop_state->current_depth, loop_state->current_depth,
3359                 src0_param.param_str, loop_state->current_depth);
3360     }
3361
3362     ++loop_state->current_depth;
3363 }
3364
3365 static void shader_glsl_if(const struct wined3d_shader_instruction *ins)
3366 {
3367     struct glsl_src_param src0_param;
3368
3369     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3370     shader_addline(ins->ctx->buffer, "if (%s) {\n", src0_param.param_str);
3371 }
3372
3373 static void shader_glsl_ifc(const struct wined3d_shader_instruction *ins)
3374 {
3375     struct glsl_src_param src0_param;
3376     struct glsl_src_param src1_param;
3377
3378     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3379     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
3380
3381     shader_addline(ins->ctx->buffer, "if (%s %s %s) {\n",
3382             src0_param.param_str, shader_glsl_get_rel_op(ins->flags), src1_param.param_str);
3383 }
3384
3385 static void shader_glsl_else(const struct wined3d_shader_instruction *ins)
3386 {
3387     shader_addline(ins->ctx->buffer, "} else {\n");
3388 }
3389
3390 static void shader_glsl_emit(const struct wined3d_shader_instruction *ins)
3391 {
3392     shader_addline(ins->ctx->buffer, "EmitVertex();\n");
3393 }
3394
3395 static void shader_glsl_break(const struct wined3d_shader_instruction *ins)
3396 {
3397     shader_addline(ins->ctx->buffer, "break;\n");
3398 }
3399
3400 /* FIXME: According to MSDN the compare is done per component. */
3401 static void shader_glsl_breakc(const struct wined3d_shader_instruction *ins)
3402 {
3403     struct glsl_src_param src0_param;
3404     struct glsl_src_param src1_param;
3405
3406     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3407     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
3408
3409     shader_addline(ins->ctx->buffer, "if (%s %s %s) break;\n",
3410             src0_param.param_str, shader_glsl_get_rel_op(ins->flags), src1_param.param_str);
3411 }
3412
3413 static void shader_glsl_breakp(const struct wined3d_shader_instruction *ins)
3414 {
3415     struct glsl_src_param src_param;
3416
3417     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src_param);
3418     shader_addline(ins->ctx->buffer, "if (bool(%s)) break;\n", src_param.param_str);
3419 }
3420
3421 static void shader_glsl_label(const struct wined3d_shader_instruction *ins)
3422 {
3423     shader_addline(ins->ctx->buffer, "}\n");
3424     shader_addline(ins->ctx->buffer, "void subroutine%u()\n{\n", ins->src[0].reg.idx[0].offset);
3425 }
3426
3427 static void shader_glsl_call(const struct wined3d_shader_instruction *ins)
3428 {
3429     shader_addline(ins->ctx->buffer, "subroutine%u();\n", ins->src[0].reg.idx[0].offset);
3430 }
3431
3432 static void shader_glsl_callnz(const struct wined3d_shader_instruction *ins)
3433 {
3434     struct glsl_src_param src1_param;
3435
3436     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
3437     shader_addline(ins->ctx->buffer, "if (%s) subroutine%u();\n",
3438             src1_param.param_str, ins->src[0].reg.idx[0].offset);
3439 }
3440
3441 static void shader_glsl_ret(const struct wined3d_shader_instruction *ins)
3442 {
3443     /* No-op. The closing } is written when a new function is started, and at the end of the shader. This
3444      * function only suppresses the unhandled instruction warning
3445      */
3446 }
3447
3448 /*********************************************
3449  * Pixel Shader Specific Code begins here
3450  ********************************************/
3451 static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
3452 {
3453     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
3454             ins->ctx->reg_maps->shader_version.minor);
3455     struct glsl_sample_function sample_function;
3456     DWORD sample_flags = 0;
3457     DWORD sampler_idx;
3458     DWORD mask = 0, swizzle;
3459     const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
3460
3461     /* 1.0-1.4: Use destination register as sampler source.
3462      * 2.0+: Use provided sampler source. */
3463     if (shader_version < WINED3D_SHADER_VERSION(2,0))
3464         sampler_idx = ins->dst[0].reg.idx[0].offset;
3465     else
3466         sampler_idx = ins->src[1].reg.idx[0].offset;
3467
3468     if (shader_version < WINED3D_SHADER_VERSION(1,4))
3469     {
3470         DWORD flags = (priv->cur_ps_args->tex_transform >> sampler_idx * WINED3D_PSARGS_TEXTRANSFORM_SHIFT)
3471                 & WINED3D_PSARGS_TEXTRANSFORM_MASK;
3472         enum wined3d_sampler_texture_type sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3473
3474         /* Projected cube textures don't make a lot of sense, the resulting coordinates stay the same. */
3475         if (flags & WINED3D_PSARGS_PROJECTED && sampler_type != WINED3DSTT_CUBE)
3476         {
3477             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3478             switch (flags & ~WINED3D_PSARGS_PROJECTED)
3479             {
3480                 case WINED3D_TTFF_COUNT1:
3481                     FIXME("WINED3D_TTFF_PROJECTED with WINED3D_TTFF_COUNT1?\n");
3482                     break;
3483                 case WINED3D_TTFF_COUNT2:
3484                     mask = WINED3DSP_WRITEMASK_1;
3485                     break;
3486                 case WINED3D_TTFF_COUNT3:
3487                     mask = WINED3DSP_WRITEMASK_2;
3488                     break;
3489                 case WINED3D_TTFF_COUNT4:
3490                 case WINED3D_TTFF_DISABLE:
3491                     mask = WINED3DSP_WRITEMASK_3;
3492                     break;
3493             }
3494         }
3495     }
3496     else if (shader_version < WINED3D_SHADER_VERSION(2,0))
3497     {
3498         enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers;
3499
3500         if (src_mod == WINED3DSPSM_DZ) {
3501             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3502             mask = WINED3DSP_WRITEMASK_2;
3503         } else if (src_mod == WINED3DSPSM_DW) {
3504             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3505             mask = WINED3DSP_WRITEMASK_3;
3506         }
3507     } else {
3508         if (ins->flags & WINED3DSI_TEXLD_PROJECT)
3509         {
3510             /* ps 2.0 texldp instruction always divides by the fourth component. */
3511             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3512             mask = WINED3DSP_WRITEMASK_3;
3513         }
3514     }
3515
3516     if (priv->cur_ps_args->np2_fixup & (1 << sampler_idx))
3517         sample_flags |= WINED3D_GLSL_SAMPLE_NPOT;
3518
3519     shader_glsl_get_sample_function(ins->ctx, sampler_idx, sample_flags, &sample_function);
3520     mask |= sample_function.coord_mask;
3521
3522     if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE;
3523     else swizzle = ins->src[1].swizzle;
3524
3525     /* 1.0-1.3: Use destination register as coordinate source.
3526        1.4+: Use provided coordinate source register. */
3527     if (shader_version < WINED3D_SHADER_VERSION(1,4))
3528     {
3529         char coord_mask[6];
3530         shader_glsl_write_mask_to_str(mask, coord_mask);
3531         shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL,
3532                 "T%u%s", sampler_idx, coord_mask);
3533     }
3534     else
3535     {
3536         struct glsl_src_param coord_param;
3537         shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param);
3538         if (ins->flags & WINED3DSI_TEXLD_BIAS)
3539         {
3540             struct glsl_src_param bias;
3541             shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias);
3542             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, bias.param_str,
3543                     "%s", coord_param.param_str);
3544         } else {
3545             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL,
3546                     "%s", coord_param.param_str);
3547         }
3548     }
3549 }
3550
3551 static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins)
3552 {
3553     const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3554     struct glsl_src_param coord_param, dx_param, dy_param;
3555     DWORD sample_flags = WINED3D_GLSL_SAMPLE_GRAD;
3556     struct glsl_sample_function sample_function;
3557     DWORD sampler_idx;
3558     DWORD swizzle = ins->src[1].swizzle;
3559     const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
3560
3561     if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD] && !gl_info->supported[EXT_GPU_SHADER4])
3562     {
3563         FIXME("texldd used, but not supported by hardware. Falling back to regular tex\n");
3564         shader_glsl_tex(ins);
3565         return;
3566     }
3567
3568     sampler_idx = ins->src[1].reg.idx[0].offset;
3569     if (priv->cur_ps_args->np2_fixup & (1 << sampler_idx))
3570         sample_flags |= WINED3D_GLSL_SAMPLE_NPOT;
3571
3572     shader_glsl_get_sample_function(ins->ctx, sampler_idx, sample_flags, &sample_function);
3573     shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
3574     shader_glsl_add_src_param(ins, &ins->src[2], sample_function.coord_mask, &dx_param);
3575     shader_glsl_add_src_param(ins, &ins->src[3], sample_function.coord_mask, &dy_param);
3576
3577     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, dx_param.param_str, dy_param.param_str, NULL,
3578                                 "%s", coord_param.param_str);
3579 }
3580
3581 static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins)
3582 {
3583     const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3584     struct glsl_src_param coord_param, lod_param;
3585     DWORD sample_flags = WINED3D_GLSL_SAMPLE_LOD;
3586     struct glsl_sample_function sample_function;
3587     DWORD sampler_idx;
3588     DWORD swizzle = ins->src[1].swizzle;
3589     const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
3590
3591     sampler_idx = ins->src[1].reg.idx[0].offset;
3592     if (ins->ctx->reg_maps->shader_version.type == WINED3D_SHADER_TYPE_PIXEL
3593             && priv->cur_ps_args->np2_fixup & (1 << sampler_idx))
3594         sample_flags |= WINED3D_GLSL_SAMPLE_NPOT;
3595
3596     shader_glsl_get_sample_function(ins->ctx, sampler_idx, sample_flags, &sample_function);
3597     shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
3598
3599     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param);
3600
3601     if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD] && !gl_info->supported[EXT_GPU_SHADER4]
3602             && ins->ctx->reg_maps->shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
3603     {
3604         /* Plain GLSL only supports Lod sampling functions in vertex shaders.
3605          * However, the NVIDIA drivers allow them in fragment shaders as well,
3606          * even without the appropriate extension. */
3607         WARN("Using %s in fragment shader.\n", sample_function.name);
3608     }
3609     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str,
3610             "%s", coord_param.param_str);
3611 }
3612
3613 static void shader_glsl_texcoord(const struct wined3d_shader_instruction *ins)
3614 {
3615     /* FIXME: Make this work for more than just 2D textures */
3616     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3617     DWORD write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3618
3619     if (!(ins->ctx->reg_maps->shader_version.major == 1 && ins->ctx->reg_maps->shader_version.minor == 4))
3620     {
3621         char dst_mask[6];
3622
3623         shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3624         shader_addline(buffer, "clamp(gl_TexCoord[%u], 0.0, 1.0)%s);\n",
3625                 ins->dst[0].reg.idx[0].offset, dst_mask);
3626     }
3627     else
3628     {
3629         enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers;
3630         DWORD reg = ins->src[0].reg.idx[0].offset;
3631         char dst_swizzle[6];
3632
3633         shader_glsl_get_swizzle(&ins->src[0], FALSE, write_mask, dst_swizzle);
3634
3635         if (src_mod == WINED3DSPSM_DZ)
3636         {
3637             unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
3638             struct glsl_src_param div_param;
3639
3640             shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &div_param);
3641
3642             if (mask_size > 1) {
3643                 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
3644             } else {
3645                 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
3646             }
3647         }
3648         else if (src_mod == WINED3DSPSM_DW)
3649         {
3650             unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
3651             struct glsl_src_param div_param;
3652
3653             shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &div_param);
3654
3655             if (mask_size > 1) {
3656                 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
3657             } else {
3658                 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
3659             }
3660         } else {
3661             shader_addline(buffer, "gl_TexCoord[%u]%s);\n", reg, dst_swizzle);
3662         }
3663     }
3664 }
3665
3666 /** Process the WINED3DSIO_TEXDP3TEX instruction in GLSL:
3667  * Take a 3-component dot product of the TexCoord[dstreg] and src,
3668  * then perform a 1D texture lookup from stage dstregnum, place into dst. */
3669 static void shader_glsl_texdp3tex(const struct wined3d_shader_instruction *ins)
3670 {
3671     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3672     DWORD sampler_idx = ins->dst[0].reg.idx[0].offset;
3673     struct glsl_sample_function sample_function;
3674     struct glsl_src_param src0_param;
3675     UINT mask_size;
3676
3677     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3678
3679     /* Do I have to take care about the projected bit? I don't think so, since the dp3 returns only one
3680      * scalar, and projected sampling would require 4.
3681      *
3682      * It is a dependent read - not valid with conditional NP2 textures
3683      */
3684     shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function);
3685     mask_size = shader_glsl_get_write_mask_size(sample_function.coord_mask);
3686
3687     switch(mask_size)
3688     {
3689         case 1:
3690             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3691                     "dot(gl_TexCoord[%u].xyz, %s)", sampler_idx, src0_param.param_str);
3692             break;
3693
3694         case 2:
3695             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3696                     "vec2(dot(gl_TexCoord[%u].xyz, %s), 0.0)", sampler_idx, src0_param.param_str);
3697             break;
3698
3699         case 3:
3700             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3701                     "vec3(dot(gl_TexCoord[%u].xyz, %s), 0.0, 0.0)", sampler_idx, src0_param.param_str);
3702             break;
3703
3704         default:
3705             FIXME("Unexpected mask size %u\n", mask_size);
3706             break;
3707     }
3708 }
3709
3710 /** Process the WINED3DSIO_TEXDP3 instruction in GLSL:
3711  * Take a 3-component dot product of the TexCoord[dstreg] and src. */
3712 static void shader_glsl_texdp3(const struct wined3d_shader_instruction *ins)
3713 {
3714     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3715     DWORD dstreg = ins->dst[0].reg.idx[0].offset;
3716     struct glsl_src_param src0_param;
3717     DWORD dst_mask;
3718     unsigned int mask_size;
3719
3720     dst_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3721     mask_size = shader_glsl_get_write_mask_size(dst_mask);
3722     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3723
3724     if (mask_size > 1) {
3725         shader_addline(ins->ctx->buffer, "vec%d(dot(T%u.xyz, %s)));\n", mask_size, dstreg, src0_param.param_str);
3726     } else {
3727         shader_addline(ins->ctx->buffer, "dot(T%u.xyz, %s));\n", dstreg, src0_param.param_str);
3728     }
3729 }
3730
3731 /** Process the WINED3DSIO_TEXDEPTH instruction in GLSL:
3732  * Calculate the depth as dst.x / dst.y   */
3733 static void shader_glsl_texdepth(const struct wined3d_shader_instruction *ins)
3734 {
3735     struct glsl_dst_param dst_param;
3736
3737     shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3738
3739     /* Tests show that texdepth never returns anything below 0.0, and that r5.y is clamped to 1.0.
3740      * Negative input is accepted, -0.25 / -0.5 returns 0.5. GL should clamp gl_FragDepth to [0;1], but
3741      * this doesn't always work, so clamp the results manually. Whether or not the x value is clamped at 1
3742      * too is irrelevant, since if x = 0, any y value < 1.0 (and > 1.0 is not allowed) results in a result
3743      * >= 1.0 or < 0.0
3744      */
3745     shader_addline(ins->ctx->buffer, "gl_FragDepth = clamp((%s.x / min(%s.y, 1.0)), 0.0, 1.0);\n",
3746             dst_param.reg_name, dst_param.reg_name);
3747 }
3748
3749 /** Process the WINED3DSIO_TEXM3X2DEPTH instruction in GLSL:
3750  * Last row of a 3x2 matrix multiply, use the result to calculate the depth:
3751  * Calculate tmp0.y = TexCoord[dstreg] . src.xyz;  (tmp0.x has already been calculated)
3752  * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y
3753  */
3754 static void shader_glsl_texm3x2depth(const struct wined3d_shader_instruction *ins)
3755 {
3756     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3757     DWORD dstreg = ins->dst[0].reg.idx[0].offset;
3758     struct glsl_src_param src0_param;
3759
3760     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3761
3762     shader_addline(ins->ctx->buffer, "tmp0.y = dot(T%u.xyz, %s);\n", dstreg, src0_param.param_str);
3763     shader_addline(ins->ctx->buffer, "gl_FragDepth = (tmp0.y == 0.0) ? 1.0 : clamp(tmp0.x / tmp0.y, 0.0, 1.0);\n");
3764 }
3765
3766 /** Process the WINED3DSIO_TEXM3X2PAD instruction in GLSL
3767  * Calculate the 1st of a 2-row matrix multiplication. */
3768 static void shader_glsl_texm3x2pad(const struct wined3d_shader_instruction *ins)
3769 {
3770     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3771     DWORD reg = ins->dst[0].reg.idx[0].offset;
3772     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3773     struct glsl_src_param src0_param;
3774
3775     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3776     shader_addline(buffer, "tmp0.x = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3777 }
3778
3779 /** Process the WINED3DSIO_TEXM3X3PAD instruction in GLSL
3780  * Calculate the 1st or 2nd row of a 3-row matrix multiplication. */
3781 static void shader_glsl_texm3x3pad(const struct wined3d_shader_instruction *ins)
3782 {
3783     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3784     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3785     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
3786     DWORD reg = ins->dst[0].reg.idx[0].offset;
3787     struct glsl_src_param src0_param;
3788
3789     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3790     shader_addline(buffer, "tmp0.%c = dot(T%u.xyz, %s);\n", 'x' + tex_mx->current_row, reg, src0_param.param_str);
3791     tex_mx->texcoord_w[tex_mx->current_row++] = reg;
3792 }
3793
3794 static void shader_glsl_texm3x2tex(const struct wined3d_shader_instruction *ins)
3795 {
3796     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3797     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3798     struct glsl_sample_function sample_function;
3799     DWORD reg = ins->dst[0].reg.idx[0].offset;
3800     struct glsl_src_param src0_param;
3801
3802     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3803     shader_addline(buffer, "tmp0.y = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3804
3805     shader_glsl_get_sample_function(ins->ctx, reg, 0, &sample_function);
3806
3807     /* Sample the texture using the calculated coordinates */
3808     shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xy");
3809 }
3810
3811 /** Process the WINED3DSIO_TEXM3X3TEX instruction in GLSL
3812  * Perform the 3rd row of a 3x3 matrix multiply, then sample the texture using the calculated coordinates */
3813 static void shader_glsl_texm3x3tex(const struct wined3d_shader_instruction *ins)
3814 {
3815     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3816     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
3817     struct glsl_sample_function sample_function;
3818     DWORD reg = ins->dst[0].reg.idx[0].offset;
3819     struct glsl_src_param src0_param;
3820
3821     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3822     shader_addline(ins->ctx->buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3823
3824     /* Dependent read, not valid with conditional NP2 */
3825     shader_glsl_get_sample_function(ins->ctx, reg, 0, &sample_function);
3826
3827     /* Sample the texture using the calculated coordinates */
3828     shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
3829
3830     tex_mx->current_row = 0;
3831 }
3832
3833 /** Process the WINED3DSIO_TEXM3X3 instruction in GLSL
3834  * Perform the 3rd row of a 3x3 matrix multiply */
3835 static void shader_glsl_texm3x3(const struct wined3d_shader_instruction *ins)
3836 {
3837     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3838     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
3839     DWORD reg = ins->dst[0].reg.idx[0].offset;
3840     struct glsl_src_param src0_param;
3841     char dst_mask[6];
3842
3843     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3844
3845     shader_glsl_append_dst(ins->ctx->buffer, ins);
3846     shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3847     shader_addline(ins->ctx->buffer, "vec4(tmp0.xy, dot(T%u.xyz, %s), 1.0)%s);\n", reg, src0_param.param_str, dst_mask);
3848
3849     tex_mx->current_row = 0;
3850 }
3851
3852 /* Process the WINED3DSIO_TEXM3X3SPEC instruction in GLSL
3853  * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
3854 static void shader_glsl_texm3x3spec(const struct wined3d_shader_instruction *ins)
3855 {
3856     struct glsl_src_param src0_param;
3857     struct glsl_src_param src1_param;
3858     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3859     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
3860     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3861     struct glsl_sample_function sample_function;
3862     DWORD reg = ins->dst[0].reg.idx[0].offset;
3863     char coord_mask[6];
3864
3865     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3866     shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param);
3867
3868     /* Perform the last matrix multiply operation */
3869     shader_addline(buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3870     /* Reflection calculation */
3871     shader_addline(buffer, "tmp0.xyz = -reflect((%s), normalize(tmp0.xyz));\n", src1_param.param_str);
3872
3873     /* Dependent read, not valid with conditional NP2 */
3874     shader_glsl_get_sample_function(ins->ctx, reg, 0, &sample_function);
3875     shader_glsl_write_mask_to_str(sample_function.coord_mask, coord_mask);
3876
3877     /* Sample the texture */
3878     shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE,
3879             NULL, NULL, NULL, "tmp0%s", coord_mask);
3880
3881     tex_mx->current_row = 0;
3882 }
3883
3884 /* Process the WINED3DSIO_TEXM3X3VSPEC instruction in GLSL
3885  * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
3886 static void shader_glsl_texm3x3vspec(const struct wined3d_shader_instruction *ins)
3887 {
3888     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3889     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
3890     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3891     struct glsl_sample_function sample_function;
3892     DWORD reg = ins->dst[0].reg.idx[0].offset;
3893     struct glsl_src_param src0_param;
3894     char coord_mask[6];
3895
3896     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3897
3898     /* Perform the last matrix multiply operation */
3899     shader_addline(buffer, "tmp0.z = dot(vec3(T%u), vec3(%s));\n", reg, src0_param.param_str);
3900
3901     /* Construct the eye-ray vector from w coordinates */
3902     shader_addline(buffer, "tmp1.xyz = normalize(vec3(gl_TexCoord[%u].w, gl_TexCoord[%u].w, gl_TexCoord[%u].w));\n",
3903             tex_mx->texcoord_w[0], tex_mx->texcoord_w[1], reg);
3904     shader_addline(buffer, "tmp0.xyz = -reflect(tmp1.xyz, normalize(tmp0.xyz));\n");
3905
3906     /* Dependent read, not valid with conditional NP2 */
3907     shader_glsl_get_sample_function(ins->ctx, reg, 0, &sample_function);
3908     shader_glsl_write_mask_to_str(sample_function.coord_mask, coord_mask);
3909
3910     /* Sample the texture using the calculated coordinates */
3911     shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE,
3912             NULL, NULL, NULL, "tmp0%s", coord_mask);
3913
3914     tex_mx->current_row = 0;
3915 }
3916
3917 /** Process the WINED3DSIO_TEXBEM instruction in GLSL.
3918  * Apply a fake bump map transform.
3919  * texbem is pshader <= 1.3 only, this saves a few version checks
3920  */
3921 static void shader_glsl_texbem(const struct wined3d_shader_instruction *ins)
3922 {
3923     const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
3924     struct glsl_sample_function sample_function;
3925     struct glsl_src_param coord_param;
3926     DWORD sampler_idx;
3927     DWORD mask;
3928     DWORD flags;
3929     char coord_mask[6];
3930
3931     sampler_idx = ins->dst[0].reg.idx[0].offset;
3932     flags = (priv->cur_ps_args->tex_transform >> sampler_idx * WINED3D_PSARGS_TEXTRANSFORM_SHIFT)
3933             & WINED3D_PSARGS_TEXTRANSFORM_MASK;
3934
3935     /* Dependent read, not valid with conditional NP2 */
3936     shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function);
3937     mask = sample_function.coord_mask;
3938
3939     shader_glsl_write_mask_to_str(mask, coord_mask);
3940
3941     /* With projected textures, texbem only divides the static texture coord,
3942      * not the displacement, so we can't let GL handle this. */
3943     if (flags & WINED3D_PSARGS_PROJECTED)
3944     {
3945         DWORD div_mask=0;
3946         char coord_div_mask[3];
3947         switch (flags & ~WINED3D_PSARGS_PROJECTED)
3948         {
3949             case WINED3D_TTFF_COUNT1:
3950                 FIXME("WINED3D_TTFF_PROJECTED with WINED3D_TTFF_COUNT1?\n");
3951                 break;
3952             case WINED3D_TTFF_COUNT2:
3953                 div_mask = WINED3DSP_WRITEMASK_1;
3954                 break;
3955             case WINED3D_TTFF_COUNT3:
3956                 div_mask = WINED3DSP_WRITEMASK_2;
3957                 break;
3958             case WINED3D_TTFF_COUNT4:
3959             case WINED3D_TTFF_DISABLE:
3960                 div_mask = WINED3DSP_WRITEMASK_3;
3961                 break;
3962         }
3963         shader_glsl_write_mask_to_str(div_mask, coord_div_mask);
3964         shader_addline(ins->ctx->buffer, "T%u%s /= T%u%s;\n", sampler_idx, coord_mask, sampler_idx, coord_div_mask);
3965     }
3966
3967     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &coord_param);
3968
3969     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3970             "T%u%s + vec4(bumpenv_mat%u * %s, 0.0, 0.0)%s", sampler_idx, coord_mask, sampler_idx,
3971             coord_param.param_str, coord_mask);
3972
3973     if (ins->handler_idx == WINED3DSIH_TEXBEML)
3974     {
3975         struct glsl_src_param luminance_param;
3976         struct glsl_dst_param dst_param;
3977
3978         shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &luminance_param);
3979         shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3980
3981         shader_addline(ins->ctx->buffer, "%s%s *= (%s * bumpenv_lum_scale%u + bumpenv_lum_offset%u);\n",
3982                 dst_param.reg_name, dst_param.mask_str,
3983                 luminance_param.param_str, sampler_idx, sampler_idx);
3984     }
3985 }
3986
3987 static void shader_glsl_bem(const struct wined3d_shader_instruction *ins)
3988 {
3989     DWORD sampler_idx = ins->dst[0].reg.idx[0].offset;
3990     struct glsl_src_param src0_param, src1_param;
3991
3992     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
3993     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
3994
3995     shader_glsl_append_dst(ins->ctx->buffer, ins);
3996     shader_addline(ins->ctx->buffer, "%s + bumpenv_mat%u * %s);\n",
3997             src0_param.param_str, sampler_idx, src1_param.param_str);
3998 }
3999
4000 /** Process the WINED3DSIO_TEXREG2AR instruction in GLSL
4001  * Sample 2D texture at dst using the alpha & red (wx) components of src as texture coordinates */
4002 static void shader_glsl_texreg2ar(const struct wined3d_shader_instruction *ins)
4003 {
4004     DWORD sampler_idx = ins->dst[0].reg.idx[0].offset;
4005     struct glsl_sample_function sample_function;
4006     struct glsl_src_param src0_param;
4007
4008     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
4009
4010     shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function);
4011     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
4012             "%s.wx", src0_param.reg_name);
4013 }
4014
4015 /** Process the WINED3DSIO_TEXREG2GB instruction in GLSL
4016  * Sample 2D texture at dst using the green & blue (yz) components of src as texture coordinates */
4017 static void shader_glsl_texreg2gb(const struct wined3d_shader_instruction *ins)
4018 {
4019     DWORD sampler_idx = ins->dst[0].reg.idx[0].offset;
4020     struct glsl_sample_function sample_function;
4021     struct glsl_src_param src0_param;
4022
4023     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
4024
4025     shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function);
4026     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
4027             "%s.yz", src0_param.reg_name);
4028 }
4029
4030 /** Process the WINED3DSIO_TEXREG2RGB instruction in GLSL
4031  * Sample texture at dst using the rgb (xyz) components of src as texture coordinates */
4032 static void shader_glsl_texreg2rgb(const struct wined3d_shader_instruction *ins)
4033 {
4034     DWORD sampler_idx = ins->dst[0].reg.idx[0].offset;
4035     struct glsl_sample_function sample_function;
4036     struct glsl_src_param src0_param;
4037
4038     /* Dependent read, not valid with conditional NP2 */
4039     shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function);
4040     shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &src0_param);
4041
4042     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
4043             "%s", src0_param.param_str);
4044 }
4045
4046 /** Process the WINED3DSIO_TEXKILL instruction in GLSL.
4047  * If any of the first 3 components are < 0, discard this pixel */
4048 static void shader_glsl_texkill(const struct wined3d_shader_instruction *ins)
4049 {
4050     struct glsl_dst_param dst_param;
4051
4052     /* The argument is a destination parameter, and no writemasks are allowed */
4053     shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
4054     if (ins->ctx->reg_maps->shader_version.major >= 2)
4055     {
4056         /* 2.0 shaders compare all 4 components in texkill */
4057         shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyzw, vec4(0.0)))) discard;\n", dst_param.reg_name);
4058     } else {
4059         /* 1.X shaders only compare the first 3 components, probably due to the nature of the texkill
4060          * instruction as a tex* instruction, and phase, which kills all a / w components. Even if all
4061          * 4 components are defined, only the first 3 are used
4062          */
4063         shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;\n", dst_param.reg_name);
4064     }
4065 }
4066
4067 /** Process the WINED3DSIO_DP2ADD instruction in GLSL.
4068  * dst = dot2(src0, src1) + src2 */
4069 static void shader_glsl_dp2add(const struct wined3d_shader_instruction *ins)
4070 {
4071     struct glsl_src_param src0_param;
4072     struct glsl_src_param src1_param;
4073     struct glsl_src_param src2_param;
4074     DWORD write_mask;
4075     unsigned int mask_size;
4076
4077     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
4078     mask_size = shader_glsl_get_write_mask_size(write_mask);
4079
4080     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
4081     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
4082     shader_glsl_add_src_param(ins, &ins->src[2], WINED3DSP_WRITEMASK_0, &src2_param);
4083
4084     if (mask_size > 1) {
4085         shader_addline(ins->ctx->buffer, "vec%d(dot(%s, %s) + %s));\n",
4086                 mask_size, src0_param.param_str, src1_param.param_str, src2_param.param_str);
4087     } else {
4088         shader_addline(ins->ctx->buffer, "dot(%s, %s) + %s);\n",
4089                 src0_param.param_str, src1_param.param_str, src2_param.param_str);
4090     }
4091 }
4092
4093 static void shader_glsl_input_pack(const struct wined3d_shader *shader, struct wined3d_shader_buffer *buffer,
4094         const struct wined3d_shader_signature_element *input_signature,
4095         const struct wined3d_shader_reg_maps *reg_maps,
4096         enum vertexprocessing_mode vertexprocessing)
4097 {
4098     WORD map = reg_maps->input_registers;
4099     unsigned int i;
4100
4101     for (i = 0; map; map >>= 1, ++i)
4102     {
4103         const char *semantic_name;
4104         UINT semantic_idx;
4105         char reg_mask[6];
4106
4107         /* Unused */
4108         if (!(map & 1)) continue;
4109
4110         semantic_name = input_signature[i].semantic_name;
4111         semantic_idx = input_signature[i].semantic_idx;
4112         shader_glsl_write_mask_to_str(input_signature[i].mask, reg_mask);
4113
4114         if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_TEXCOORD))
4115         {
4116             if (semantic_idx < 8 && vertexprocessing == pretransformed)
4117                 shader_addline(buffer, "ps_in[%u]%s = gl_TexCoord[%u]%s;\n",
4118                         shader->u.ps.input_reg_map[i], reg_mask, semantic_idx, reg_mask);
4119             else
4120                 shader_addline(buffer, "ps_in[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
4121                         shader->u.ps.input_reg_map[i], reg_mask, reg_mask);
4122         }
4123         else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_COLOR))
4124         {
4125             if (!semantic_idx)
4126                 shader_addline(buffer, "ps_in[%u]%s = vec4(gl_Color)%s;\n",
4127                         shader->u.ps.input_reg_map[i], reg_mask, reg_mask);
4128             else if (semantic_idx == 1)
4129                 shader_addline(buffer, "ps_in[%u]%s = vec4(gl_SecondaryColor)%s;\n",
4130                         shader->u.ps.input_reg_map[i], reg_mask, reg_mask);
4131             else
4132                 shader_addline(buffer, "ps_in[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
4133                         shader->u.ps.input_reg_map[i], reg_mask, reg_mask);
4134         }
4135         else
4136         {
4137             shader_addline(buffer, "ps_in[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
4138                     shader->u.ps.input_reg_map[i], reg_mask, reg_mask);
4139         }
4140     }
4141 }
4142
4143 /*********************************************
4144  * Vertex Shader Specific Code begins here
4145  ********************************************/
4146
4147 static void add_glsl_program_entry(struct shader_glsl_priv *priv, struct glsl_shader_prog_link *entry)
4148 {
4149     struct glsl_program_key key;
4150
4151     key.vs_id = entry->vs.id;
4152     key.gs_id = entry->gs.id;
4153     key.ps_id = entry->ps.id;
4154
4155     if (wine_rb_put(&priv->program_lookup, &key, &entry->program_lookup_entry) == -1)
4156     {
4157         ERR("Failed to insert program entry.\n");
4158     }
4159 }
4160
4161 static struct glsl_shader_prog_link *get_glsl_program_entry(const struct shader_glsl_priv *priv,
4162         GLhandleARB vs_id, GLhandleARB gs_id, GLhandleARB ps_id)
4163 {
4164     struct wine_rb_entry *entry;
4165     struct glsl_program_key key;
4166
4167     key.vs_id = vs_id;
4168     key.gs_id = gs_id;
4169     key.ps_id = ps_id;
4170
4171     entry = wine_rb_get(&priv->program_lookup, &key);
4172     return entry ? WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry) : NULL;
4173 }
4174
4175 /* Context activation is done by the caller. */
4176 static void delete_glsl_program_entry(struct shader_glsl_priv *priv, const struct wined3d_gl_info *gl_info,
4177         struct glsl_shader_prog_link *entry)
4178 {
4179     struct glsl_program_key key;
4180
4181     key.vs_id = entry->vs.id;
4182     key.gs_id = entry->gs.id;
4183     key.ps_id = entry->ps.id;
4184     wine_rb_remove(&priv->program_lookup, &key);
4185
4186     GL_EXTCALL(glDeleteObjectARB(entry->programId));
4187     if (entry->vs.id)
4188         list_remove(&entry->vs.shader_entry);
4189     if (entry->gs.id)
4190         list_remove(&entry->gs.shader_entry);
4191     if (entry->ps.id)
4192         list_remove(&entry->ps.shader_entry);
4193     HeapFree(GetProcessHeap(), 0, entry->vs.uniform_f_locations);
4194     HeapFree(GetProcessHeap(), 0, entry->ps.uniform_f_locations);
4195     HeapFree(GetProcessHeap(), 0, entry);
4196 }
4197
4198 static void handle_ps3_input(struct wined3d_shader_buffer *buffer,
4199         const struct wined3d_gl_info *gl_info, const DWORD *map,
4200         const struct wined3d_shader_signature_element *input_signature,
4201         const struct wined3d_shader_reg_maps *reg_maps_in,
4202         const struct wined3d_shader_signature_element *output_signature,
4203         const struct wined3d_shader_reg_maps *reg_maps_out)
4204 {
4205     unsigned int i, j;
4206     const char *semantic_name_in;
4207     UINT semantic_idx_in;
4208     DWORD *set;
4209     DWORD in_idx;
4210     unsigned int in_count = vec4_varyings(3, gl_info);
4211     char reg_mask[6];
4212     char destination[50];
4213     WORD input_map, output_map;
4214
4215     set = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*set) * (in_count + 2));
4216
4217     input_map = reg_maps_in->input_registers;
4218     for (i = 0; input_map; input_map >>= 1, ++i)
4219     {
4220         if (!(input_map & 1)) continue;
4221
4222         in_idx = map[i];
4223         /* Declared, but not read register */
4224         if (in_idx == ~0U) continue;
4225         if (in_idx >= (in_count + 2))
4226         {
4227             FIXME("More input varyings declared than supported, expect issues.\n");
4228             continue;
4229         }
4230
4231         if (in_idx == in_count)
4232             sprintf(destination, "gl_FrontColor");
4233         else if (in_idx == in_count + 1)
4234             sprintf(destination, "gl_FrontSecondaryColor");
4235         else
4236             sprintf(destination, "ps_in[%u]", in_idx);
4237
4238         semantic_name_in = input_signature[i].semantic_name;
4239         semantic_idx_in = input_signature[i].semantic_idx;
4240         set[in_idx] = ~0U;
4241
4242         output_map = reg_maps_out->output_registers;
4243         for (j = 0; output_map; output_map >>= 1, ++j)
4244         {
4245             DWORD mask;
4246
4247             if (!(output_map & 1)
4248                     || semantic_idx_in != output_signature[j].semantic_idx
4249                     || strcmp(semantic_name_in, output_signature[j].semantic_name)
4250                     || !(mask = input_signature[i].mask & output_signature[j].mask))
4251                 continue;
4252
4253             set[in_idx] = mask;
4254             shader_glsl_write_mask_to_str(mask, reg_mask);
4255
4256             shader_addline(buffer, "%s%s = vs_out[%u]%s;\n",
4257                     destination, reg_mask, j, reg_mask);
4258         }
4259     }
4260
4261     for (i = 0; i < in_count + 2; ++i)
4262     {
4263         unsigned int size;
4264
4265         if (!set[i] || set[i] == WINED3DSP_WRITEMASK_ALL)
4266             continue;
4267
4268         if (set[i] == ~0U) set[i] = 0;
4269
4270         size = 0;
4271         if (!(set[i] & WINED3DSP_WRITEMASK_0)) reg_mask[size++] = 'x';
4272         if (!(set[i] & WINED3DSP_WRITEMASK_1)) reg_mask[size++] = 'y';
4273         if (!(set[i] & WINED3DSP_WRITEMASK_2)) reg_mask[size++] = 'z';
4274         if (!(set[i] & WINED3DSP_WRITEMASK_3)) reg_mask[size++] = 'w';
4275         reg_mask[size] = '\0';
4276
4277         if (i == in_count)
4278             sprintf(destination, "gl_FrontColor");
4279         else if (i == in_count + 1)
4280             sprintf(destination, "gl_FrontSecondaryColor");
4281         else
4282             sprintf(destination, "ps_in[%u]", i);
4283
4284         if (size == 1) shader_addline(buffer, "%s.%s = 0.0;\n", destination, reg_mask);
4285         else shader_addline(buffer, "%s.%s = vec%u(0.0);\n", destination, reg_mask, size);
4286     }
4287
4288     HeapFree(GetProcessHeap(), 0, set);
4289 }
4290
4291 /* Context activation is done by the caller. */
4292 static GLhandleARB generate_param_reorder_function(struct wined3d_shader_buffer *buffer,
4293         const struct wined3d_shader *vs, const struct wined3d_shader *ps,
4294         const struct wined3d_gl_info *gl_info)
4295 {
4296     GLhandleARB ret = 0;
4297     DWORD ps_major = ps ? ps->reg_maps.shader_version.major : 0;
4298     unsigned int i;
4299     const char *semantic_name;
4300     UINT semantic_idx;
4301     char reg_mask[6];
4302     const struct wined3d_shader_signature_element *output_signature = vs->output_signature;
4303     WORD map = vs->reg_maps.output_registers;
4304
4305     shader_buffer_clear(buffer);
4306
4307     shader_addline(buffer, "#version 120\n");
4308
4309     if (ps_major < 3)
4310     {
4311         shader_addline(buffer, "void order_ps_input(in vec4 vs_out[%u])\n{\n", vs->limits.packed_output);
4312
4313         for (i = 0; map; map >>= 1, ++i)
4314         {
4315             DWORD write_mask;
4316
4317             if (!(map & 1)) continue;
4318
4319             semantic_name = output_signature[i].semantic_name;
4320             semantic_idx = output_signature[i].semantic_idx;
4321             write_mask = output_signature[i].mask;
4322             shader_glsl_write_mask_to_str(write_mask, reg_mask);
4323
4324             if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_COLOR))
4325             {
4326                 if (!semantic_idx)
4327                     shader_addline(buffer, "gl_FrontColor%s = vs_out[%u]%s;\n",
4328                             reg_mask, i, reg_mask);
4329                 else if (semantic_idx == 1)
4330                     shader_addline(buffer, "gl_FrontSecondaryColor%s = vs_out[%u]%s;\n",
4331                             reg_mask, i, reg_mask);
4332             }
4333             else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_POSITION))
4334             {
4335                 shader_addline(buffer, "gl_Position%s = vs_out[%u]%s;\n",
4336                         reg_mask, i, reg_mask);
4337             }
4338             else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_TEXCOORD))
4339             {
4340                 if (semantic_idx < 8)
4341                 {
4342                     if (!(gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) || ps_major > 0)
4343                         write_mask |= WINED3DSP_WRITEMASK_3;
4344
4345                     shader_addline(buffer, "gl_TexCoord[%u]%s = vs_out[%u]%s;\n",
4346                             semantic_idx, reg_mask, i, reg_mask);
4347                     if (!(write_mask & WINED3DSP_WRITEMASK_3))
4348                         shader_addline(buffer, "gl_TexCoord[%u].w = 1.0;\n", semantic_idx);
4349                 }
4350             }
4351             else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_PSIZE))
4352             {
4353                 shader_addline(buffer, "gl_PointSize = vs_out[%u].%c;\n", i, reg_mask[1]);
4354             }
4355             else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_FOG))
4356             {
4357                 shader_addline(buffer, "gl_FogFragCoord = clamp(vs_out[%u].%c, 0.0, 1.0);\n", i, reg_mask[1]);
4358             }
4359         }
4360         shader_addline(buffer, "}\n");
4361     }
4362     else
4363     {
4364         UINT in_count = min(vec4_varyings(ps_major, gl_info), ps->limits.packed_input);
4365         /* This one is tricky: a 3.0 pixel shader reads from a 3.0 vertex shader */
4366         shader_addline(buffer, "varying vec4 ps_in[%u];\n", in_count);
4367         shader_addline(buffer, "void order_ps_input(in vec4 vs_out[%u])\n{\n", vs->limits.packed_output);
4368
4369         /* First, sort out position and point size. Those are not passed to the pixel shader */
4370         for (i = 0; map; map >>= 1, ++i)
4371         {
4372             if (!(map & 1)) continue;
4373
4374             semantic_name = output_signature[i].semantic_name;
4375             shader_glsl_write_mask_to_str(output_signature[i].mask, reg_mask);
4376
4377             if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_POSITION))
4378             {
4379                 shader_addline(buffer, "gl_Position%s = vs_out[%u]%s;\n",
4380                         reg_mask, i, reg_mask);
4381             }
4382             else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_PSIZE))
4383             {
4384                 shader_addline(buffer, "gl_PointSize = vs_out[%u].%c;\n", i, reg_mask[1]);
4385             }
4386         }
4387
4388         /* Then, fix the pixel shader input */
4389         handle_ps3_input(buffer, gl_info, ps->u.ps.input_reg_map, ps->input_signature,
4390                 &ps->reg_maps, output_signature, &vs->reg_maps);
4391
4392         shader_addline(buffer, "}\n");
4393     }
4394
4395     ret = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
4396     checkGLcall("glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)");
4397     shader_glsl_compile(gl_info, ret, buffer->buffer);
4398
4399     return ret;
4400 }
4401
4402 static void shader_glsl_generate_srgb_write_correction(struct wined3d_shader_buffer *buffer)
4403 {
4404     shader_addline(buffer, "tmp0.xyz = pow(gl_FragData[0].xyz, vec3(srgb_const0.x));\n");
4405     shader_addline(buffer, "tmp0.xyz = tmp0.xyz * vec3(srgb_const0.y) - vec3(srgb_const0.z);\n");
4406     shader_addline(buffer, "tmp1.xyz = gl_FragData[0].xyz * vec3(srgb_const0.w);\n");
4407     shader_addline(buffer, "bvec3 srgb_compare = lessThan(gl_FragData[0].xyz, vec3(srgb_const1.x));\n");
4408     shader_addline(buffer, "gl_FragData[0].xyz = mix(tmp0.xyz, tmp1.xyz, vec3(srgb_compare));\n");
4409     shader_addline(buffer, "gl_FragData[0] = clamp(gl_FragData[0], 0.0, 1.0);\n");
4410 }
4411
4412 static void shader_glsl_generate_fog_code(struct wined3d_shader_buffer *buffer, enum fogmode mode)
4413 {
4414     switch (mode)
4415     {
4416         case FOG_OFF:
4417             return;
4418
4419         case FOG_LINEAR:
4420             /* Fog = (gl_Fog.end - gl_FogFragCoord) / (gl_Fog.end - gl_Fog.start) */
4421             shader_addline(buffer, "float Fog = (gl_Fog.end - gl_FogFragCoord) / (gl_Fog.end - gl_Fog.start);\n");
4422             break;
4423
4424         case FOG_EXP:
4425             /* Fog = e^-(gl_Fog.density * gl_FogFragCoord) */
4426             shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_FogFragCoord);\n");
4427             break;
4428
4429         case FOG_EXP2:
4430             /* Fog = e^-((gl_Fog.density * gl_FogFragCoord)^2) */
4431             shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_Fog.density * gl_FogFragCoord * gl_FogFragCoord);\n");
4432             break;
4433
4434         default:
4435             ERR("Invalid fog mode %#x.\n", mode);
4436             return;
4437     }
4438
4439     shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, clamp(Fog, 0.0, 1.0));\n");
4440 }
4441
4442 /* Context activation is done by the caller. */
4443 static GLuint shader_glsl_generate_pshader(const struct wined3d_context *context,
4444         struct wined3d_shader_buffer *buffer, const struct wined3d_shader *shader,
4445         const struct ps_compile_args *args, struct ps_np2fixup_info *np2fixup_info)
4446 {
4447     const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
4448     const struct wined3d_gl_info *gl_info = context->gl_info;
4449     const DWORD *function = shader->function;
4450     struct shader_glsl_ctx_priv priv_ctx;
4451
4452     /* Create the hw GLSL shader object and assign it as the shader->prgId */
4453     GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
4454
4455     memset(&priv_ctx, 0, sizeof(priv_ctx));
4456     priv_ctx.cur_ps_args = args;
4457     priv_ctx.cur_np2fixup_info = np2fixup_info;
4458
4459     shader_addline(buffer, "#version 120\n");
4460
4461     if (gl_info->supported[ARB_SHADER_BIT_ENCODING])
4462         shader_addline(buffer, "#extension GL_ARB_shader_bit_encoding : enable\n");
4463     if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
4464         shader_addline(buffer, "#extension GL_ARB_shader_texture_lod : enable\n");
4465     /* The spec says that it doesn't have to be explicitly enabled, but the
4466      * nvidia drivers write a warning if we don't do so. */
4467     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
4468         shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n");
4469     if (gl_info->supported[EXT_GPU_SHADER4])
4470         shader_addline(buffer, "#extension GL_EXT_gpu_shader4 : enable\n");
4471
4472     /* Base Declarations */
4473     shader_generate_glsl_declarations(context, buffer, shader, reg_maps, &priv_ctx);
4474
4475     /* Pack 3.0 inputs */
4476     if (reg_maps->shader_version.major >= 3 && args->vp_mode != vertexshader)
4477         shader_glsl_input_pack(shader, buffer, shader->input_signature, reg_maps, args->vp_mode);
4478
4479     /* Base Shader Body */
4480     shader_generate_main(shader, buffer, reg_maps, function, &priv_ctx);
4481
4482     /* Pixel shaders < 2.0 place the resulting color in R0 implicitly */
4483     if (reg_maps->shader_version.major < 2)
4484     {
4485         /* Some older cards like GeforceFX ones don't support multiple buffers, so also not gl_FragData */
4486         shader_addline(buffer, "gl_FragData[0] = R0;\n");
4487     }
4488
4489     if (args->srgb_correction)
4490         shader_glsl_generate_srgb_write_correction(buffer);
4491
4492     /* SM < 3 does not replace the fog stage. */
4493     if (reg_maps->shader_version.major < 3)
4494         shader_glsl_generate_fog_code(buffer, args->fog);
4495
4496     shader_addline(buffer, "}\n");
4497
4498     TRACE("Compiling shader object %u\n", shader_obj);
4499     shader_glsl_compile(gl_info, shader_obj, buffer->buffer);
4500
4501     /* Store the shader object */
4502     return shader_obj;
4503 }
4504
4505 /* Context activation is done by the caller. */
4506 static GLuint shader_glsl_generate_vshader(const struct wined3d_context *context,
4507         struct wined3d_shader_buffer *buffer, const struct wined3d_shader *shader,
4508         const struct vs_compile_args *args)
4509 {
4510     const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
4511     const struct wined3d_gl_info *gl_info = context->gl_info;
4512     const DWORD *function = shader->function;
4513     struct shader_glsl_ctx_priv priv_ctx;
4514
4515     /* Create the hw GLSL shader program and assign it as the shader->prgId */
4516     GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
4517
4518     shader_addline(buffer, "#version 120\n");
4519
4520     if (gl_info->supported[ARB_SHADER_BIT_ENCODING])
4521         shader_addline(buffer, "#extension GL_ARB_shader_bit_encoding : enable\n");
4522     if (gl_info->supported[EXT_GPU_SHADER4])
4523         shader_addline(buffer, "#extension GL_EXT_gpu_shader4 : enable\n");
4524
4525     memset(&priv_ctx, 0, sizeof(priv_ctx));
4526     priv_ctx.cur_vs_args = args;
4527
4528     /* Base Declarations */
4529     shader_generate_glsl_declarations(context, buffer, shader, reg_maps, &priv_ctx);
4530
4531     /* Base Shader Body */
4532     shader_generate_main(shader, buffer, reg_maps, function, &priv_ctx);
4533
4534     /* Unpack outputs */
4535     shader_addline(buffer, "order_ps_input(vs_out);\n");
4536
4537     /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used
4538      * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE),
4539      * the fog frag coord is thrown away. If the fog frag coord is used, but not written by
4540      * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0)
4541      */
4542     if (args->fog_src == VS_FOG_Z)
4543         shader_addline(buffer, "gl_FogFragCoord = gl_Position.z;\n");
4544     else if (!reg_maps->fog)
4545         shader_addline(buffer, "gl_FogFragCoord = 0.0;\n");
4546
4547     /* We always store the clipplanes without y inversion */
4548     if (args->clip_enabled)
4549         shader_addline(buffer, "gl_ClipVertex = gl_Position;\n");
4550
4551     /* Write the final position.
4552      *
4553      * OpenGL coordinates specify the center of the pixel while d3d coords specify
4554      * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
4555      * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
4556      * contains 1.0 to allow a mad.
4557      */
4558     shader_addline(buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n");
4559     shader_addline(buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n");
4560
4561     /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
4562      *
4563      * Basically we want (in homogeneous coordinates) z = z * 2 - 1. However, shaders are run
4564      * before the homogeneous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
4565      * which is the same as z = z * 2 - w.
4566      */
4567     shader_addline(buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
4568
4569     shader_addline(buffer, "}\n");
4570
4571     TRACE("Compiling shader object %u\n", shader_obj);
4572     shader_glsl_compile(gl_info, shader_obj, buffer->buffer);
4573
4574     return shader_obj;
4575 }
4576
4577 /* Context activation is done by the caller. */
4578 static GLhandleARB shader_glsl_generate_geometry_shader(const struct wined3d_context *context,
4579         struct wined3d_shader_buffer *buffer, const struct wined3d_shader *shader)
4580 {
4581     const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
4582     const struct wined3d_gl_info *gl_info = context->gl_info;
4583     const DWORD *function = shader->function;
4584     struct shader_glsl_ctx_priv priv_ctx;
4585     GLhandleARB shader_id;
4586
4587     shader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_GEOMETRY_SHADER_ARB));
4588
4589     shader_addline(buffer, "#version 120\n");
4590
4591     if (gl_info->supported[ARB_GEOMETRY_SHADER4])
4592         shader_addline(buffer, "#extension GL_ARB_geometry_shader4 : enable\n");
4593     if (gl_info->supported[ARB_SHADER_BIT_ENCODING])
4594         shader_addline(buffer, "#extension GL_ARB_shader_bit_encoding : enable\n");
4595     if (gl_info->supported[EXT_GPU_SHADER4])
4596         shader_addline(buffer, "#extension GL_EXT_gpu_shader4 : enable\n");
4597
4598     memset(&priv_ctx, 0, sizeof(priv_ctx));
4599     shader_generate_glsl_declarations(context, buffer, shader, reg_maps, &priv_ctx);
4600     shader_generate_main(shader, buffer, reg_maps, function, &priv_ctx);
4601     shader_addline(buffer, "}\n");
4602
4603     TRACE("Compiling shader object %u.\n", shader_id);
4604     shader_glsl_compile(gl_info, shader_id, buffer->buffer);
4605
4606     return shader_id;
4607 }
4608
4609 static GLhandleARB find_glsl_pshader(const struct wined3d_context *context,
4610         struct wined3d_shader_buffer *buffer, struct wined3d_shader *shader,
4611         const struct ps_compile_args *args, const struct ps_np2fixup_info **np2fixup_info)
4612 {
4613     struct glsl_ps_compiled_shader *gl_shaders, *new_array;
4614     struct glsl_shader_private *shader_data;
4615     struct ps_np2fixup_info *np2fixup;
4616     UINT i;
4617     DWORD new_size;
4618     GLhandleARB ret;
4619
4620     if (!shader->backend_data)
4621     {
4622         shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
4623         if (!shader->backend_data)
4624         {
4625             ERR("Failed to allocate backend data.\n");
4626             return 0;
4627         }
4628     }
4629     shader_data = shader->backend_data;
4630     gl_shaders = shader_data->gl_shaders.ps;
4631
4632     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
4633      * so a linear search is more performant than a hashmap or a binary search
4634      * (cache coherency etc)
4635      */
4636     for (i = 0; i < shader_data->num_gl_shaders; ++i)
4637     {
4638         if (!memcmp(&gl_shaders[i].args, args, sizeof(*args)))
4639         {
4640             if (args->np2_fixup)
4641                 *np2fixup_info = &gl_shaders[i].np2fixup;
4642             return gl_shaders[i].prgId;
4643         }
4644     }
4645
4646     TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader);
4647     if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
4648         if (shader_data->num_gl_shaders)
4649         {
4650             new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4651             new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders.ps,
4652                     new_size * sizeof(*gl_shaders));
4653         }
4654         else
4655         {
4656             new_array = HeapAlloc(GetProcessHeap(), 0, sizeof(*gl_shaders));
4657             new_size = 1;
4658         }
4659
4660         if(!new_array) {
4661             ERR("Out of memory\n");
4662             return 0;
4663         }
4664         shader_data->gl_shaders.ps = new_array;
4665         shader_data->shader_array_size = new_size;
4666         gl_shaders = new_array;
4667     }
4668
4669     gl_shaders[shader_data->num_gl_shaders].args = *args;
4670
4671     np2fixup = &gl_shaders[shader_data->num_gl_shaders].np2fixup;
4672     memset(np2fixup, 0, sizeof(*np2fixup));
4673     *np2fixup_info = args->np2_fixup ? np2fixup : NULL;
4674
4675     pixelshader_update_samplers(shader, args->tex_types);
4676
4677     shader_buffer_clear(buffer);
4678     ret = shader_glsl_generate_pshader(context, buffer, shader, args, np2fixup);
4679     gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
4680
4681     return ret;
4682 }
4683
4684 static inline BOOL vs_args_equal(const struct vs_compile_args *stored, const struct vs_compile_args *new,
4685                                  const DWORD use_map) {
4686     if((stored->swizzle_map & use_map) != new->swizzle_map) return FALSE;
4687     if((stored->clip_enabled) != new->clip_enabled) return FALSE;
4688     return stored->fog_src == new->fog_src;
4689 }
4690
4691 static GLhandleARB find_glsl_vshader(const struct wined3d_context *context,
4692         struct wined3d_shader_buffer *buffer, struct wined3d_shader *shader,
4693         const struct vs_compile_args *args)
4694 {
4695     UINT i;
4696     DWORD new_size;
4697     DWORD use_map = shader->device->stream_info.use_map;
4698     struct glsl_vs_compiled_shader *gl_shaders, *new_array;
4699     struct glsl_shader_private *shader_data;
4700     GLhandleARB ret;
4701
4702     if (!shader->backend_data)
4703     {
4704         shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
4705         if (!shader->backend_data)
4706         {
4707             ERR("Failed to allocate backend data.\n");
4708             return 0;
4709         }
4710     }
4711     shader_data = shader->backend_data;
4712     gl_shaders = shader_data->gl_shaders.vs;
4713
4714     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
4715      * so a linear search is more performant than a hashmap or a binary search
4716      * (cache coherency etc)
4717      */
4718     for (i = 0; i < shader_data->num_gl_shaders; ++i)
4719     {
4720         if (vs_args_equal(&gl_shaders[i].args, args, use_map))
4721             return gl_shaders[i].prgId;
4722     }
4723
4724     TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader);
4725
4726     if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
4727         if (shader_data->num_gl_shaders)
4728         {
4729             new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4730             new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders.vs,
4731                     new_size * sizeof(*gl_shaders));
4732         }
4733         else
4734         {
4735             new_array = HeapAlloc(GetProcessHeap(), 0, sizeof(*gl_shaders));
4736             new_size = 1;
4737         }
4738
4739         if(!new_array) {
4740             ERR("Out of memory\n");
4741             return 0;
4742         }
4743         shader_data->gl_shaders.vs = new_array;
4744         shader_data->shader_array_size = new_size;
4745         gl_shaders = new_array;
4746     }
4747
4748     gl_shaders[shader_data->num_gl_shaders].args = *args;
4749
4750     shader_buffer_clear(buffer);
4751     ret = shader_glsl_generate_vshader(context, buffer, shader, args);
4752     gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
4753
4754     return ret;
4755 }
4756
4757 static GLhandleARB find_glsl_geometry_shader(const struct wined3d_context *context,
4758         struct wined3d_shader_buffer *buffer, struct wined3d_shader *shader)
4759 {
4760     struct glsl_gs_compiled_shader *gl_shaders;
4761     struct glsl_shader_private *shader_data;
4762     GLhandleARB ret;
4763
4764     if (!shader->backend_data)
4765     {
4766         if (!(shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data))))
4767         {
4768             ERR("Failed to allocate backend data.\n");
4769             return 0;
4770         }
4771     }
4772     shader_data = shader->backend_data;
4773     gl_shaders = shader_data->gl_shaders.gs;
4774
4775     if (shader_data->num_gl_shaders)
4776         return gl_shaders[0].id;
4777
4778     TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader);
4779
4780     if (!(shader_data->gl_shaders.gs = HeapAlloc(GetProcessHeap(), 0, sizeof(*gl_shaders))))
4781     {
4782         ERR("Failed to allocate GL shader array.\n");
4783         return 0;
4784     }
4785     shader_data->shader_array_size = 1;
4786     gl_shaders = shader_data->gl_shaders.gs;
4787
4788     shader_buffer_clear(buffer);
4789     ret = shader_glsl_generate_geometry_shader(context, buffer, shader);
4790     gl_shaders[shader_data->num_gl_shaders++].id = ret;
4791
4792     return ret;
4793 }
4794
4795 static const char *shader_glsl_get_ffp_fragment_op_arg(struct wined3d_shader_buffer *buffer,
4796         DWORD argnum, unsigned int stage, DWORD arg)
4797 {
4798     const char *ret;
4799
4800     if (arg == ARG_UNUSED)
4801         return "<unused arg>";
4802
4803     switch (arg & WINED3DTA_SELECTMASK)
4804     {
4805         case WINED3DTA_DIFFUSE:
4806             ret = "gl_Color";
4807             break;
4808
4809         case WINED3DTA_CURRENT:
4810             if (!stage)
4811                 ret = "gl_Color";
4812             else
4813                 ret = "ret";
4814             break;
4815
4816         case WINED3DTA_TEXTURE:
4817             switch (stage)
4818             {
4819                 case 0: ret = "tex0"; break;
4820                 case 1: ret = "tex1"; break;
4821                 case 2: ret = "tex2"; break;
4822                 case 3: ret = "tex3"; break;
4823                 case 4: ret = "tex4"; break;
4824                 case 5: ret = "tex5"; break;
4825                 case 6: ret = "tex6"; break;
4826                 case 7: ret = "tex7"; break;
4827                 default:
4828                     ret = "<invalid texture>";
4829                     break;
4830             }
4831             break;
4832
4833         case WINED3DTA_TFACTOR:
4834             ret = "tex_factor";
4835             break;
4836
4837         case WINED3DTA_SPECULAR:
4838             ret = "gl_SecondaryColor";
4839             break;
4840
4841         case WINED3DTA_TEMP:
4842             ret = "temp_reg";
4843             break;
4844
4845         case WINED3DTA_CONSTANT:
4846             FIXME("Per-stage constants not implemented.\n");
4847             switch (stage)
4848             {
4849                 case 0: ret = "const0"; break;
4850                 case 1: ret = "const1"; break;
4851                 case 2: ret = "const2"; break;
4852                 case 3: ret = "const3"; break;
4853                 case 4: ret = "const4"; break;
4854                 case 5: ret = "const5"; break;
4855                 case 6: ret = "const6"; break;
4856                 case 7: ret = "const7"; break;
4857                 default:
4858                     ret = "<invalid constant>";
4859                     break;
4860             }
4861             break;
4862
4863         default:
4864             return "<unhandled arg>";
4865     }
4866
4867     if (arg & WINED3DTA_COMPLEMENT)
4868     {
4869         shader_addline(buffer, "arg%u = vec4(1.0) - %s;\n", argnum, ret);
4870         if (argnum == 0)
4871             ret = "arg0";
4872         else if (argnum == 1)
4873             ret = "arg1";
4874         else if (argnum == 2)
4875             ret = "arg2";
4876     }
4877
4878     if (arg & WINED3DTA_ALPHAREPLICATE)
4879     {
4880         shader_addline(buffer, "arg%u = vec4(%s.w);\n", argnum, ret);
4881         if (argnum == 0)
4882             ret = "arg0";
4883         else if (argnum == 1)
4884             ret = "arg1";
4885         else if (argnum == 2)
4886             ret = "arg2";
4887     }
4888
4889     return ret;
4890 }
4891
4892 static void shader_glsl_ffp_fragment_op(struct wined3d_shader_buffer *buffer, unsigned int stage, BOOL color,
4893         BOOL alpha, DWORD dst, DWORD op, DWORD dw_arg0, DWORD dw_arg1, DWORD dw_arg2)
4894 {
4895     const char *dstmask, *dstreg, *arg0, *arg1, *arg2;
4896
4897     if (color && alpha)
4898         dstmask = "";
4899     else if (color)
4900         dstmask = ".xyz";
4901     else
4902         dstmask = ".w";
4903
4904     if (dst == tempreg)
4905         dstreg = "temp_reg";
4906     else
4907         dstreg = "ret";
4908
4909     arg0 = shader_glsl_get_ffp_fragment_op_arg(buffer, 0, stage, dw_arg0);
4910     arg1 = shader_glsl_get_ffp_fragment_op_arg(buffer, 1, stage, dw_arg1);
4911     arg2 = shader_glsl_get_ffp_fragment_op_arg(buffer, 2, stage, dw_arg2);
4912
4913     switch (op)
4914     {
4915         case WINED3D_TOP_DISABLE:
4916             if (!stage)
4917                 shader_addline(buffer, "%s%s = gl_Color%s;\n", dstreg, dstmask, dstmask);
4918             break;
4919
4920         case WINED3D_TOP_SELECT_ARG1:
4921             shader_addline(buffer, "%s%s = %s%s;\n", dstreg, dstmask, arg1, dstmask);
4922             break;
4923
4924         case WINED3D_TOP_SELECT_ARG2:
4925             shader_addline(buffer, "%s%s = %s%s;\n", dstreg, dstmask, arg2, dstmask);
4926             break;
4927
4928         case WINED3D_TOP_MODULATE:
4929             shader_addline(buffer, "%s%s = %s%s * %s%s;\n", dstreg, dstmask, arg1, dstmask, arg2, dstmask);
4930             break;
4931
4932         case WINED3D_TOP_MODULATE_4X:
4933             shader_addline(buffer, "%s%s = clamp(%s%s * %s%s * 4.0, 0.0, 1.0);\n",
4934                     dstreg, dstmask, arg1, dstmask, arg2, dstmask);
4935             break;
4936
4937         case WINED3D_TOP_MODULATE_2X:
4938             shader_addline(buffer, "%s%s = clamp(%s%s * %s%s * 2.0, 0.0, 1.0);\n",
4939                     dstreg, dstmask, arg1, dstmask, arg2, dstmask);
4940             break;
4941
4942         case WINED3D_TOP_ADD:
4943             shader_addline(buffer, "%s%s = clamp(%s%s + %s%s, 0.0, 1.0);\n",
4944                     dstreg, dstmask, arg1, dstmask, arg2, dstmask);
4945             break;
4946
4947         case WINED3D_TOP_ADD_SIGNED:
4948             shader_addline(buffer, "%s%s = clamp(%s%s + (%s - vec4(0.5))%s, 0.0, 1.0);\n",
4949                     dstreg, dstmask, arg1, dstmask, arg2, dstmask);
4950             break;
4951
4952         case WINED3D_TOP_ADD_SIGNED_2X:
4953             shader_addline(buffer, "%s%s = clamp((%s%s + (%s - vec4(0.5))%s) * 2.0, 0.0, 1.0);\n",
4954                     dstreg, dstmask, arg1, dstmask, arg2, dstmask);
4955             break;
4956
4957         case WINED3D_TOP_SUBTRACT:
4958             shader_addline(buffer, "%s%s = clamp(%s%s - %s%s, 0.0, 1.0);\n",
4959                     dstreg, dstmask, arg1, dstmask, arg2, dstmask);
4960             break;
4961
4962         case WINED3D_TOP_ADD_SMOOTH:
4963             shader_addline(buffer, "%s%s = clamp((vec4(1.0) - %s)%s * %s%s + %s%s, 0.0, 1.0);\n",
4964                     dstreg, dstmask, arg1, dstmask, arg2, dstmask, arg1, dstmask);
4965             break;
4966
4967         case WINED3D_TOP_BLEND_DIFFUSE_ALPHA:
4968             arg0 = shader_glsl_get_ffp_fragment_op_arg(buffer, 0, stage, WINED3DTA_DIFFUSE);
4969             shader_addline(buffer, "%s%s = mix(%s%s, %s%s, %s.w);\n",
4970                     dstreg, dstmask, arg2, dstmask, arg1, dstmask, arg0);
4971             break;
4972
4973         case WINED3D_TOP_BLEND_TEXTURE_ALPHA:
4974             arg0 = shader_glsl_get_ffp_fragment_op_arg(buffer, 0, stage, WINED3DTA_TEXTURE);
4975             shader_addline(buffer, "%s%s = mix(%s%s, %s%s, %s.w);\n",
4976                     dstreg, dstmask, arg2, dstmask, arg1, dstmask, arg0);
4977             break;
4978
4979         case WINED3D_TOP_BLEND_FACTOR_ALPHA:
4980             arg0 = shader_glsl_get_ffp_fragment_op_arg(buffer, 0, stage, WINED3DTA_TFACTOR);
4981             shader_addline(buffer, "%s%s = mix(%s%s, %s%s, %s.w);\n",
4982                     dstreg, dstmask, arg2, dstmask, arg1, dstmask, arg0);
4983             break;
4984
4985         case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM:
4986             arg0 = shader_glsl_get_ffp_fragment_op_arg(buffer, 0, stage, WINED3DTA_TEXTURE);
4987             shader_addline(buffer, "%s%s = clamp(%s%s * (1.0 - %s.w) + %s%s, 0.0, 1.0);\n",
4988                     dstreg, dstmask, arg2, dstmask, arg0, arg1, dstmask);
4989             break;
4990
4991         case WINED3D_TOP_BLEND_CURRENT_ALPHA:
4992             arg0 = shader_glsl_get_ffp_fragment_op_arg(buffer, 0, stage, WINED3DTA_CURRENT);
4993             shader_addline(buffer, "%s%s = mix(%s%s, %s%s, %s.w);\n",
4994                     dstreg, dstmask, arg2, dstmask, arg1, dstmask, arg0);
4995             break;
4996
4997         case WINED3D_TOP_MODULATE_ALPHA_ADD_COLOR:
4998             shader_addline(buffer, "%s%s = clamp(%s%s * %s.w + %s%s, 0.0, 1.0);\n",
4999                     dstreg, dstmask, arg2, dstmask, arg1, arg1, dstmask);
5000             break;
5001
5002         case WINED3D_TOP_MODULATE_COLOR_ADD_ALPHA:
5003             shader_addline(buffer, "%s%s = clamp(%s%s * %s%s + %s.w, 0.0, 1.0);\n",
5004                     dstreg, dstmask, arg1, dstmask, arg2, dstmask, arg1);
5005             break;
5006
5007         case WINED3D_TOP_MODULATE_INVALPHA_ADD_COLOR:
5008             shader_addline(buffer, "%s%s = clamp(%s%s * (1.0 - %s.w) + %s%s, 0.0, 1.0);\n",
5009                     dstreg, dstmask, arg2, dstmask, arg1, arg1, dstmask);
5010             break;
5011         case WINED3D_TOP_MODULATE_INVCOLOR_ADD_ALPHA:
5012             shader_addline(buffer, "%s%s = clamp((vec4(1.0) - %s)%s * %s%s + %s.w, 0.0, 1.0);\n",
5013                     dstreg, dstmask, arg1, dstmask, arg2, dstmask, arg1);
5014             break;
5015
5016         case WINED3D_TOP_BUMPENVMAP:
5017         case WINED3D_TOP_BUMPENVMAP_LUMINANCE:
5018             /* These are handled in the first pass, nothing to do. */
5019             break;
5020
5021         case WINED3D_TOP_DOTPRODUCT3:
5022             shader_addline(buffer, "%s%s = vec4(clamp(dot(%s.xyz - 0.5, %s.xyz - 0.5) * 4.0, 0.0, 1.0))%s;\n",
5023                     dstreg, dstmask, arg1, arg2, dstmask);
5024             break;
5025
5026         case WINED3D_TOP_MULTIPLY_ADD:
5027             shader_addline(buffer, "%s%s = clamp(%s%s * %s%s + %s%s, 0.0, 1.0);\n",
5028                     dstreg, dstmask, arg1, dstmask, arg2, dstmask, arg0, dstmask);
5029             break;
5030
5031         case WINED3D_TOP_LERP:
5032             /* MSDN isn't quite right here. */
5033             shader_addline(buffer, "%s%s = mix(%s%s, %s%s, %s%s);\n",
5034                     dstreg, dstmask, arg2, dstmask, arg1, dstmask, arg0, dstmask);
5035             break;
5036
5037         default:
5038             FIXME("Unhandled operation %#x.\n", op);
5039             break;
5040     }
5041 }
5042
5043 /* Context activation is done by the caller. */
5044 static GLuint shader_glsl_generate_ffp_fragment_shader(struct wined3d_shader_buffer *buffer,
5045         const struct ffp_frag_settings *settings, const struct wined3d_gl_info *gl_info)
5046 {
5047     BOOL tempreg_used = FALSE, tfactor_used = FALSE;
5048     BYTE lum_map = 0, bump_map = 0, tex_map = 0;
5049     const char *final_combiner_src = "ret";
5050     UINT lowest_disabled_stage;
5051     GLhandleARB shader_obj;
5052     DWORD arg0, arg1, arg2;
5053     unsigned int stage;
5054
5055     shader_buffer_clear(buffer);
5056
5057     /* Find out which textures are read */
5058     for (stage = 0; stage < MAX_TEXTURES; ++stage)
5059     {
5060         if (settings->op[stage].cop == WINED3D_TOP_DISABLE)
5061             break;
5062
5063         arg0 = settings->op[stage].carg0 & WINED3DTA_SELECTMASK;
5064         arg1 = settings->op[stage].carg1 & WINED3DTA_SELECTMASK;
5065         arg2 = settings->op[stage].carg2 & WINED3DTA_SELECTMASK;
5066
5067         if (arg0 == WINED3DTA_TEXTURE || arg1 == WINED3DTA_TEXTURE || arg2 == WINED3DTA_TEXTURE)
5068             tex_map |= 1 << stage;
5069         if (arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR)
5070             tfactor_used = TRUE;
5071         if (arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP)
5072             tempreg_used = TRUE;
5073         if (settings->op[stage].dst == tempreg)
5074             tempreg_used = TRUE;
5075
5076         switch (settings->op[stage].cop)
5077         {
5078             case WINED3D_TOP_BUMPENVMAP_LUMINANCE:
5079                 lum_map |= 1 << stage;
5080                 /* fall through */
5081             case WINED3D_TOP_BUMPENVMAP:
5082                 bump_map |= 1 << stage;
5083                 /* fall through */
5084             case WINED3D_TOP_BLEND_TEXTURE_ALPHA:
5085             case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM:
5086                 tex_map |= 1 << stage;
5087                 break;
5088
5089             case WINED3D_TOP_BLEND_FACTOR_ALPHA:
5090                 tfactor_used = TRUE;
5091                 break;
5092
5093             default:
5094                 break;
5095         }
5096
5097         if (settings->op[stage].aop == WINED3D_TOP_DISABLE)
5098             continue;
5099
5100         arg0 = settings->op[stage].aarg0 & WINED3DTA_SELECTMASK;
5101         arg1 = settings->op[stage].aarg1 & WINED3DTA_SELECTMASK;
5102         arg2 = settings->op[stage].aarg2 & WINED3DTA_SELECTMASK;
5103
5104         if (arg0 == WINED3DTA_TEXTURE || arg1 == WINED3DTA_TEXTURE || arg2 == WINED3DTA_TEXTURE)
5105             tex_map |= 1 << stage;
5106         if (arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR)
5107             tfactor_used = TRUE;
5108         if (arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP)
5109             tempreg_used = TRUE;
5110     }
5111     lowest_disabled_stage = stage;
5112
5113     shader_addline(buffer, "#version 120\n");
5114
5115     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
5116         shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n");
5117
5118     shader_addline(buffer, "vec4 tmp0, tmp1;\n");
5119     shader_addline(buffer, "vec4 ret;\n");
5120     if (tempreg_used || settings->sRGB_write)
5121         shader_addline(buffer, "vec4 temp_reg;\n");
5122     shader_addline(buffer, "vec4 arg0, arg1, arg2;\n");
5123
5124     for (stage = 0; stage < MAX_TEXTURES; ++stage)
5125     {
5126         if (!(tex_map & (1 << stage)))
5127             continue;
5128
5129         switch (settings->op[stage].tex_type)
5130         {
5131             case tex_1d:
5132                 shader_addline(buffer, "uniform sampler1D ps_sampler%u;\n", stage);
5133                 break;
5134             case tex_2d:
5135                 shader_addline(buffer, "uniform sampler2D ps_sampler%u;\n", stage);
5136                 break;
5137             case tex_3d:
5138                 shader_addline(buffer, "uniform sampler3D ps_sampler%u;\n", stage);
5139                 break;
5140             case tex_cube:
5141                 shader_addline(buffer, "uniform samplerCube ps_sampler%u;\n", stage);
5142                 break;
5143             case tex_rect:
5144                 shader_addline(buffer, "uniform sampler2DRect ps_sampler%u;\n", stage);
5145                 break;
5146             default:
5147                 FIXME("Unhandled sampler type %#x.\n", settings->op[stage].tex_type);
5148                 break;
5149         }
5150
5151         shader_addline(buffer, "vec4 tex%u;\n", stage);
5152
5153         if (!(bump_map & (1 << stage)))
5154             continue;
5155         shader_addline(buffer, "uniform mat2 bumpenv_mat%u;\n", stage);
5156
5157         if (!(lum_map & (1 << stage)))
5158             continue;
5159         shader_addline(buffer, "uniform float bumpenv_lum_scale%u;\n", stage);
5160         shader_addline(buffer, "uniform float bumpenv_lum_offset%u;\n", stage);
5161     }
5162     if (tfactor_used)
5163         shader_addline(buffer, "uniform vec4 tex_factor;\n");
5164     shader_addline(buffer, "uniform vec4 specular_enable;\n");
5165
5166     if (settings->sRGB_write)
5167     {
5168         shader_addline(buffer, "const vec4 srgb_const0 = vec4(%.8e, %.8e, %.8e, %.8e);\n",
5169                 srgb_pow, srgb_mul_high, srgb_sub_high, srgb_mul_low);
5170         shader_addline(buffer, "const vec4 srgb_const1 = vec4(%.8e, 0.0, 0.0, 0.0);\n",
5171                 srgb_cmp);
5172     }
5173
5174     shader_addline(buffer, "void main()\n{\n");
5175
5176     if (lowest_disabled_stage < 7 && settings->emul_clipplanes)
5177         shader_addline(buffer, "if (any(lessThan(gl_texCoord[7], vec4(0.0)))) discard;\n");
5178
5179     /* Generate texture sampling instructions) */
5180     for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage)
5181     {
5182         const char *texture_function, *coord_mask;
5183         char tex_reg_name[8];
5184         BOOL proj, clamp;
5185
5186         if (!(tex_map & (1 << stage)))
5187             continue;
5188
5189         if (settings->op[stage].projected == proj_none)
5190         {
5191             proj = FALSE;
5192         }
5193         else if (settings->op[stage].projected == proj_count4
5194                 || settings->op[stage].projected == proj_count3)
5195         {
5196             proj = TRUE;
5197         }
5198         else
5199         {
5200             FIXME("Unexpected projection mode %d\n", settings->op[stage].projected);
5201             proj = TRUE;
5202         }
5203
5204         if (settings->op[stage].cop == WINED3D_TOP_BUMPENVMAP
5205                 || settings->op[stage].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)
5206             clamp = FALSE;
5207         else
5208             clamp = TRUE;
5209
5210         switch (settings->op[stage].tex_type)
5211         {
5212             case tex_1d:
5213                 if (proj)
5214                 {
5215                     texture_function = "texture1DProj";
5216                     coord_mask = "xw";
5217                 }
5218                 else
5219                 {
5220                     texture_function = "texture1D";
5221                     coord_mask = "x";
5222                 }
5223                 break;
5224             case tex_2d:
5225                 if (proj)
5226                 {
5227                     texture_function = "texture2DProj";
5228                     coord_mask = "xyw";
5229                 }
5230                 else
5231                 {
5232                     texture_function = "texture2D";
5233                     coord_mask = "xy";
5234                 }
5235                 break;
5236             case tex_3d:
5237                 if (proj)
5238                 {
5239                     texture_function = "texture3DProj";
5240                     coord_mask = "xyzw";
5241                 }
5242                 else
5243                 {
5244                     texture_function = "texture3D";
5245                     coord_mask = "xyz";
5246                 }
5247                 break;
5248             case tex_cube:
5249                 texture_function = "textureCube";
5250                 coord_mask = "xyz";
5251                 break;
5252             case tex_rect:
5253                 if (proj)
5254                 {
5255                     texture_function = "texture2DRectProj";
5256                     coord_mask = "xyw";
5257                 }
5258                 else
5259                 {
5260                     texture_function = "texture2DRect";
5261                     coord_mask = "xy";
5262                 }
5263                 break;
5264             default:
5265                 FIXME("Unhandled texture type %#x.\n", settings->op[stage].tex_type);
5266                 texture_function = "";
5267                 coord_mask = "xyzw";
5268                 break;
5269         }
5270
5271         if (stage > 0
5272                 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP
5273                 || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE))
5274         {
5275             shader_addline(buffer, "ret.xy = bumpenv_mat%u * tex%u.xy;\n", stage - 1, stage - 1);
5276
5277             /* With projective textures, texbem only divides the static
5278              * texture coord, not the displacement, so multiply the
5279              * displacement with the dividing parameter before passing it to
5280              * TXP. */
5281             if (settings->op[stage].projected != proj_none)
5282             {
5283                 if (settings->op[stage].projected == proj_count4)
5284                 {
5285                     shader_addline(buffer, "ret.xy = (ret.xy * gl_TexCoord[%u].w) + gl_TexCoord[%u].xy;\n",
5286                             stage, stage);
5287                     shader_addline(buffer, "ret.zw = gl_TexCoord[%u].ww;\n", stage);
5288                 }
5289                 else
5290                 {
5291                     shader_addline(buffer, "ret.xy = (ret.xy * gl_TexCoord[%u].z) + gl_TexCoord[%u].xy;\n",
5292                             stage, stage);
5293                     shader_addline(buffer, "ret.zw = gl_TexCoord[%u].zz;\n", stage);
5294                 }
5295             }
5296             else
5297             {
5298                 shader_addline(buffer, "ret = gl_TexCoord[%u] + ret.xyxy;\n", stage);
5299             }
5300
5301             if (clamp)
5302                 shader_addline(buffer, "tex%u = clamp(%s(ps_sampler%u, ret.%s), 0.0, 1.0);\n",
5303                         stage, texture_function, stage, coord_mask);
5304             else
5305                 shader_addline(buffer, "tex%u = %s(ps_sampler%u, ret.%s);\n",
5306                         stage, texture_function, stage, coord_mask);
5307
5308             if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)
5309                 shader_addline(buffer, "tex%u *= clamp(tex%u.z * bumpenv_lum_scale%u + bumpenv_lum_offset%u, 0.0, 1.0);\n",
5310                         stage, stage - 1, stage - 1, stage - 1);
5311         }
5312         else if (settings->op[stage].projected == proj_count3)
5313         {
5314             if (clamp)
5315                 shader_addline(buffer, "tex%u = clamp(%s(ps_sampler%u, gl_TexCoord[%u].xyz), 0.0, 1.0);\n",
5316                         stage, texture_function, stage, stage);
5317             else
5318                 shader_addline(buffer, "tex%u = %s(ps_sampler%u, gl_TexCoord[%u].xyz);\n",
5319                         stage, texture_function, stage, stage);
5320         }
5321         else
5322         {
5323             if (clamp)
5324                 shader_addline(buffer, "tex%u = clamp(%s(ps_sampler%u, gl_TexCoord[%u].%s), 0.0, 1.0);\n",
5325                         stage, texture_function, stage, stage, coord_mask);
5326             else
5327                 shader_addline(buffer, "tex%u = %s(ps_sampler%u, gl_TexCoord[%u].%s);\n",
5328                         stage, texture_function, stage, stage, coord_mask);
5329         }
5330
5331         sprintf(tex_reg_name, "tex%u", stage);
5332         shader_glsl_color_correction_ext(buffer, tex_reg_name, WINED3DSP_WRITEMASK_ALL,
5333                 settings->op[stage].color_fixup);
5334     }
5335
5336     /* Generate the main shader */
5337     for (stage = 0; stage < MAX_TEXTURES; ++stage)
5338     {
5339         BOOL op_equal;
5340
5341         if (settings->op[stage].cop == WINED3D_TOP_DISABLE)
5342         {
5343             if (!stage)
5344                 final_combiner_src = "gl_Color";
5345             break;
5346         }
5347
5348         if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1
5349                 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1)
5350             op_equal = settings->op[stage].carg1 == settings->op[stage].aarg1;
5351         else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1
5352                 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2)
5353             op_equal = settings->op[stage].carg1 == settings->op[stage].aarg2;
5354         else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2
5355                 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1)
5356             op_equal = settings->op[stage].carg2 == settings->op[stage].aarg1;
5357         else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2
5358                 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2)
5359             op_equal = settings->op[stage].carg2 == settings->op[stage].aarg2;
5360         else
5361             op_equal = settings->op[stage].aop == settings->op[stage].cop
5362                     && settings->op[stage].carg0 == settings->op[stage].aarg0
5363                     && settings->op[stage].carg1 == settings->op[stage].aarg1
5364                     && settings->op[stage].carg2 == settings->op[stage].aarg2;
5365
5366         if (settings->op[stage].aop == WINED3D_TOP_DISABLE)
5367         {
5368             shader_glsl_ffp_fragment_op(buffer, stage, TRUE, FALSE, settings->op[stage].dst,
5369                     settings->op[stage].cop, settings->op[stage].carg0,
5370                     settings->op[stage].carg1, settings->op[stage].carg2);
5371             if (!stage)
5372                 shader_addline(buffer, "ret.w = gl_Color.w;\n");
5373         }
5374         else if (op_equal)
5375         {
5376             shader_glsl_ffp_fragment_op(buffer, stage, TRUE, TRUE, settings->op[stage].dst,
5377                     settings->op[stage].cop, settings->op[stage].carg0,
5378                     settings->op[stage].carg1, settings->op[stage].carg2);
5379         }
5380         else
5381         {
5382             shader_glsl_ffp_fragment_op(buffer, stage, TRUE, FALSE, settings->op[stage].dst,
5383                     settings->op[stage].cop, settings->op[stage].carg0,
5384                     settings->op[stage].carg1, settings->op[stage].carg2);
5385             shader_glsl_ffp_fragment_op(buffer, stage, FALSE, TRUE, settings->op[stage].dst,
5386                     settings->op[stage].aop, settings->op[stage].aarg0,
5387                     settings->op[stage].aarg1, settings->op[stage].aarg2);
5388         }
5389     }
5390
5391     shader_addline(buffer, "gl_FragData[0] = gl_SecondaryColor * specular_enable + %s;\n", final_combiner_src);
5392
5393     if (settings->sRGB_write)
5394         shader_glsl_generate_srgb_write_correction(buffer);
5395
5396     shader_glsl_generate_fog_code(buffer, settings->fog);
5397
5398     shader_addline(buffer, "}\n");
5399
5400     shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
5401     shader_glsl_compile(gl_info, shader_obj, buffer->buffer);
5402     return shader_obj;
5403 }
5404
5405 static struct glsl_ffp_fragment_shader *shader_glsl_find_ffp_fragment_shader(struct shader_glsl_priv *priv,
5406         const struct wined3d_gl_info *gl_info, const struct ffp_frag_settings *args)
5407 {
5408     struct glsl_ffp_fragment_shader *glsl_desc;
5409     const struct ffp_frag_desc *desc;
5410
5411     if ((desc = find_ffp_frag_shader(&priv->ffp_fragment_shaders, args)))
5412         return CONTAINING_RECORD(desc, struct glsl_ffp_fragment_shader, entry);
5413
5414     if (!(glsl_desc = HeapAlloc(GetProcessHeap(), 0, sizeof(*glsl_desc))))
5415         return NULL;
5416
5417     glsl_desc->entry.settings = *args;
5418     glsl_desc->id = shader_glsl_generate_ffp_fragment_shader(&priv->shader_buffer, args, gl_info);
5419     list_init(&glsl_desc->linked_programs);
5420     add_ffp_frag_shader(&priv->ffp_fragment_shaders, &glsl_desc->entry);
5421
5422     return glsl_desc;
5423 }
5424
5425
5426 static void shader_glsl_init_vs_uniform_locations(const struct wined3d_gl_info *gl_info,
5427         GLhandleARB program_id, struct glsl_vs_program *vs)
5428 {
5429     unsigned int i;
5430     char name[32];
5431
5432     vs->uniform_f_locations = HeapAlloc(GetProcessHeap(), 0,
5433             sizeof(GLhandleARB) * gl_info->limits.glsl_vs_float_constants);
5434     for (i = 0; i < gl_info->limits.glsl_vs_float_constants; ++i)
5435     {
5436         snprintf(name, sizeof(name), "vs_c[%u]", i);
5437         vs->uniform_f_locations[i] = GL_EXTCALL(glGetUniformLocationARB(program_id, name));
5438     }
5439
5440     for (i = 0; i < MAX_CONST_I; ++i)
5441     {
5442         snprintf(name, sizeof(name), "vs_i[%u]", i);
5443         vs->uniform_i_locations[i] = GL_EXTCALL(glGetUniformLocationARB(program_id, name));
5444     }
5445
5446     vs->pos_fixup_location = GL_EXTCALL(glGetUniformLocationARB(program_id, "posFixup"));
5447 }
5448
5449 static void shader_glsl_init_ps_uniform_locations(const struct wined3d_gl_info *gl_info,
5450         GLhandleARB program_id, struct glsl_ps_program *ps)
5451 {
5452     unsigned int i;
5453     char name[32];
5454
5455     ps->uniform_f_locations = HeapAlloc(GetProcessHeap(), 0,
5456             sizeof(GLhandleARB) * gl_info->limits.glsl_ps_float_constants);
5457     for (i = 0; i < gl_info->limits.glsl_ps_float_constants; ++i)
5458     {
5459         snprintf(name, sizeof(name), "ps_c[%u]", i);
5460         ps->uniform_f_locations[i] = GL_EXTCALL(glGetUniformLocationARB(program_id, name));
5461     }
5462
5463     for (i = 0; i < MAX_CONST_I; ++i)
5464     {
5465         snprintf(name, sizeof(name), "ps_i[%u]", i);
5466         ps->uniform_i_locations[i] = GL_EXTCALL(glGetUniformLocationARB(program_id, name));
5467     }
5468
5469     for (i = 0; i < MAX_TEXTURES; ++i)
5470     {
5471         snprintf(name, sizeof(name), "bumpenv_mat%u", i);
5472         ps->bumpenv_mat_location[i] = GL_EXTCALL(glGetUniformLocationARB(program_id, name));
5473         snprintf(name, sizeof(name), "bumpenv_lum_scale%u", i);
5474         ps->bumpenv_lum_scale_location[i] = GL_EXTCALL(glGetUniformLocationARB(program_id, name));
5475         snprintf(name, sizeof(name), "bumpenv_lum_offset%u", i);
5476         ps->bumpenv_lum_offset_location[i] = GL_EXTCALL(glGetUniformLocationARB(program_id, name));
5477     }
5478
5479     ps->tex_factor_location = GL_EXTCALL(glGetUniformLocationARB(program_id, "tex_factor"));
5480     ps->specular_enable_location = GL_EXTCALL(glGetUniformLocationARB(program_id, "specular_enable"));
5481     ps->np2_fixup_location = GL_EXTCALL(glGetUniformLocationARB(program_id, "ps_samplerNP2Fixup"));
5482     ps->ycorrection_location = GL_EXTCALL(glGetUniformLocationARB(program_id, "ycorrection"));
5483 }
5484
5485 /* Context activation is done by the caller. */
5486 static void set_glsl_shader_program(const struct wined3d_context *context, struct wined3d_device *device,
5487         enum wined3d_shader_mode vertex_mode, enum wined3d_shader_mode fragment_mode)
5488 {
5489     const struct wined3d_state *state = &device->stateBlock->state;
5490     const struct wined3d_gl_info *gl_info = context->gl_info;
5491     const struct ps_np2fixup_info *np2fixup_info = NULL;
5492     struct shader_glsl_priv *priv = device->shader_priv;
5493     struct glsl_shader_prog_link *entry = NULL;
5494     struct wined3d_shader *vshader = NULL;
5495     struct wined3d_shader *gshader = NULL;
5496     struct wined3d_shader *pshader = NULL;
5497     GLhandleARB programId                  = 0;
5498     GLhandleARB reorder_shader_id          = 0;
5499     unsigned int i;
5500     struct ps_compile_args ps_compile_args;
5501     struct vs_compile_args vs_compile_args;
5502     GLhandleARB vs_id, gs_id, ps_id;
5503     struct list *ps_list;
5504
5505     if (vertex_mode == WINED3D_SHADER_MODE_SHADER)
5506     {
5507         vshader = state->vertex_shader;
5508         find_vs_compile_args(state, vshader, &vs_compile_args);
5509         vs_id = find_glsl_vshader(context, &priv->shader_buffer, vshader, &vs_compile_args);
5510
5511         if ((gshader = state->geometry_shader))
5512             gs_id = find_glsl_geometry_shader(context, &priv->shader_buffer, gshader);
5513         else
5514             gs_id = 0;
5515     }
5516     else
5517     {
5518         vs_id = 0;
5519         gs_id = 0;
5520     }
5521
5522     if (fragment_mode == WINED3D_SHADER_MODE_SHADER)
5523     {
5524         pshader = state->pixel_shader;
5525         find_ps_compile_args(state, pshader, &ps_compile_args);
5526         ps_id = find_glsl_pshader(context, &priv->shader_buffer,
5527                 pshader, &ps_compile_args, &np2fixup_info);
5528         ps_list = &pshader->linked_programs;
5529     }
5530     else if (fragment_mode == WINED3D_SHADER_MODE_FFP && priv->fragment_pipe == &glsl_fragment_pipe)
5531     {
5532         struct glsl_ffp_fragment_shader *ffp_shader;
5533         struct ffp_frag_settings settings;
5534
5535         gen_ffp_frag_op(device, state, &settings, FALSE);
5536         ffp_shader = shader_glsl_find_ffp_fragment_shader(priv, gl_info, &settings);
5537         ps_id = ffp_shader->id;
5538         ps_list = &ffp_shader->linked_programs;
5539     }
5540     else
5541     {
5542         ps_id = 0;
5543     }
5544
5545     if ((!vs_id && !gs_id && !ps_id) || (entry = get_glsl_program_entry(priv, vs_id, gs_id, ps_id)))
5546     {
5547         priv->glsl_program = entry;
5548         return;
5549     }
5550
5551     /* If we get to this point, then no matching program exists, so we create one */
5552     programId = GL_EXTCALL(glCreateProgramObjectARB());
5553     TRACE("Created new GLSL shader program %u\n", programId);
5554
5555     /* Create the entry */
5556     entry = HeapAlloc(GetProcessHeap(), 0, sizeof(struct glsl_shader_prog_link));
5557     entry->programId = programId;
5558     entry->vs.id = vs_id;
5559     entry->gs.id = gs_id;
5560     entry->ps.id = ps_id;
5561     entry->constant_version = 0;
5562     entry->ps.np2_fixup_info = np2fixup_info;
5563     /* Add the hash table entry */
5564     add_glsl_program_entry(priv, entry);
5565
5566     /* Set the current program */
5567     priv->glsl_program = entry;
5568
5569     /* Attach GLSL vshader */
5570     if (vshader)
5571     {
5572         WORD map = vshader->reg_maps.input_registers;
5573         char tmp_name[10];
5574
5575         reorder_shader_id = generate_param_reorder_function(&priv->shader_buffer, vshader, pshader, gl_info);
5576         TRACE("Attaching GLSL shader object %u to program %u\n", reorder_shader_id, programId);
5577         GL_EXTCALL(glAttachObjectARB(programId, reorder_shader_id));
5578         checkGLcall("glAttachObjectARB");
5579         /* Flag the reorder function for deletion, then it will be freed automatically when the program
5580          * is destroyed
5581          */
5582         GL_EXTCALL(glDeleteObjectARB(reorder_shader_id));
5583
5584         TRACE("Attaching GLSL shader object %u to program %u.\n", vs_id, programId);
5585         GL_EXTCALL(glAttachObjectARB(programId, vs_id));
5586         checkGLcall("glAttachObjectARB");
5587
5588         /* Bind vertex attributes to a corresponding index number to match
5589          * the same index numbers as ARB_vertex_programs (makes loading
5590          * vertex attributes simpler).  With this method, we can use the
5591          * exact same code to load the attributes later for both ARB and
5592          * GLSL shaders.
5593          *
5594          * We have to do this here because we need to know the Program ID
5595          * in order to make the bindings work, and it has to be done prior
5596          * to linking the GLSL program. */
5597         for (i = 0; map; map >>= 1, ++i)
5598         {
5599             if (!(map & 1)) continue;
5600
5601             snprintf(tmp_name, sizeof(tmp_name), "vs_in%u", i);
5602             GL_EXTCALL(glBindAttribLocationARB(programId, i, tmp_name));
5603         }
5604         checkGLcall("glBindAttribLocationARB");
5605
5606         list_add_head(&vshader->linked_programs, &entry->vs.shader_entry);
5607     }
5608
5609     if (gshader)
5610     {
5611         TRACE("Attaching GLSL geometry shader object %u to program %u.\n", gs_id, programId);
5612         GL_EXTCALL(glAttachObjectARB(programId, gs_id));
5613         checkGLcall("glAttachObjectARB");
5614
5615         TRACE("input type %s, output type %s, vertices out %u.\n",
5616                 debug_d3dprimitivetype(gshader->u.gs.input_type),
5617                 debug_d3dprimitivetype(gshader->u.gs.output_type),
5618                 gshader->u.gs.vertices_out);
5619         GL_EXTCALL(glProgramParameteriARB(programId, GL_GEOMETRY_INPUT_TYPE_ARB,
5620                 gl_primitive_type_from_d3d(gshader->u.gs.input_type)));
5621         GL_EXTCALL(glProgramParameteriARB(programId, GL_GEOMETRY_OUTPUT_TYPE_ARB,
5622                 gl_primitive_type_from_d3d(gshader->u.gs.output_type)));
5623         GL_EXTCALL(glProgramParameteriARB(programId, GL_GEOMETRY_VERTICES_OUT_ARB,
5624                 gshader->u.gs.vertices_out));
5625         checkGLcall("glProgramParameteriARB");
5626
5627         list_add_head(&gshader->linked_programs, &entry->gs.shader_entry);
5628     }
5629
5630     /* Attach GLSL pshader */
5631     if (ps_id)
5632     {
5633         TRACE("Attaching GLSL shader object %u to program %u.\n", ps_id, programId);
5634         GL_EXTCALL(glAttachObjectARB(programId, ps_id));
5635         checkGLcall("glAttachObjectARB");
5636
5637         list_add_head(ps_list, &entry->ps.shader_entry);
5638     }
5639
5640     /* Link the program */
5641     TRACE("Linking GLSL shader program %u\n", programId);
5642     GL_EXTCALL(glLinkProgramARB(programId));
5643     shader_glsl_validate_link(gl_info, programId);
5644
5645     shader_glsl_init_vs_uniform_locations(gl_info, programId, &entry->vs);
5646     shader_glsl_init_ps_uniform_locations(gl_info, programId, &entry->ps);
5647     checkGLcall("Find glsl program uniform locations");
5648
5649     if (pshader && pshader->reg_maps.shader_version.major >= 3
5650             && pshader->u.ps.declared_in_count > vec4_varyings(3, gl_info))
5651     {
5652         TRACE("Shader %d needs vertex color clamping disabled\n", programId);
5653         entry->vs.vertex_color_clamp = GL_FALSE;
5654     }
5655     else
5656     {
5657         entry->vs.vertex_color_clamp = GL_FIXED_ONLY_ARB;
5658     }
5659
5660     /* Set the shader to allow uniform loading on it */
5661     GL_EXTCALL(glUseProgramObjectARB(programId));
5662     checkGLcall("glUseProgramObjectARB(programId)");
5663
5664     /* Load the vertex and pixel samplers now. The function that finds the mappings makes sure
5665      * that it stays the same for each vertexshader-pixelshader pair(=linked glsl program). If
5666      * a pshader with fixed function pipeline is used there are no vertex samplers, and if a
5667      * vertex shader with fixed function pixel processing is used we make sure that the card
5668      * supports enough samplers to allow the max number of vertex samplers with all possible
5669      * fixed function fragment processing setups. So once the program is linked these samplers
5670      * won't change.
5671      */
5672     shader_glsl_load_vsamplers(gl_info, device->texUnitMap, programId);
5673     shader_glsl_load_psamplers(gl_info, device->texUnitMap, programId);
5674 }
5675
5676 /* Context activation is done by the caller. */
5677 static GLhandleARB create_glsl_blt_shader(const struct wined3d_gl_info *gl_info, enum tex_types tex_type, BOOL masked)
5678 {
5679     GLhandleARB program_id;
5680     GLhandleARB vshader_id, pshader_id;
5681     const char *blt_pshader;
5682
5683     static const char *blt_vshader =
5684         "#version 120\n"
5685         "void main(void)\n"
5686         "{\n"
5687         "    gl_Position = gl_Vertex;\n"
5688         "    gl_FrontColor = vec4(1.0);\n"
5689         "    gl_TexCoord[0] = gl_MultiTexCoord0;\n"
5690         "}\n";
5691
5692     static const char * const blt_pshaders_full[tex_type_count] =
5693     {
5694         /* tex_1d */
5695         NULL,
5696         /* tex_2d */
5697         "#version 120\n"
5698         "uniform sampler2D sampler;\n"
5699         "void main(void)\n"
5700         "{\n"
5701         "    gl_FragDepth = texture2D(sampler, gl_TexCoord[0].xy).x;\n"
5702         "}\n",
5703         /* tex_3d */
5704         NULL,
5705         /* tex_cube */
5706         "#version 120\n"
5707         "uniform samplerCube sampler;\n"
5708         "void main(void)\n"
5709         "{\n"
5710         "    gl_FragDepth = textureCube(sampler, gl_TexCoord[0].xyz).x;\n"
5711         "}\n",
5712         /* tex_rect */
5713         "#version 120\n"
5714         "#extension GL_ARB_texture_rectangle : enable\n"
5715         "uniform sampler2DRect sampler;\n"
5716         "void main(void)\n"
5717         "{\n"
5718         "    gl_FragDepth = texture2DRect(sampler, gl_TexCoord[0].xy).x;\n"
5719         "}\n",
5720     };
5721
5722     static const char * const blt_pshaders_masked[tex_type_count] =
5723     {
5724         /* tex_1d */
5725         NULL,
5726         /* tex_2d */
5727         "#version 120\n"
5728         "uniform sampler2D sampler;\n"
5729         "uniform vec4 mask;\n"
5730         "void main(void)\n"
5731         "{\n"
5732         "    if (all(lessThan(gl_FragCoord.xy, mask.zw))) discard;\n"
5733         "    gl_FragDepth = texture2D(sampler, gl_TexCoord[0].xy).x;\n"
5734         "}\n",
5735         /* tex_3d */
5736         NULL,
5737         /* tex_cube */
5738         "#version 120\n"
5739         "uniform samplerCube sampler;\n"
5740         "uniform vec4 mask;\n"
5741         "void main(void)\n"
5742         "{\n"
5743         "    if (all(lessThan(gl_FragCoord.xy, mask.zw))) discard;\n"
5744         "    gl_FragDepth = textureCube(sampler, gl_TexCoord[0].xyz).x;\n"
5745         "}\n",
5746         /* tex_rect */
5747         "#version 120\n"
5748         "#extension GL_ARB_texture_rectangle : enable\n"
5749         "uniform sampler2DRect sampler;\n"
5750         "uniform vec4 mask;\n"
5751         "void main(void)\n"
5752         "{\n"
5753         "    if (all(lessThan(gl_FragCoord.xy, mask.zw))) discard;\n"
5754         "    gl_FragDepth = texture2DRect(sampler, gl_TexCoord[0].xy).x;\n"
5755         "}\n",
5756     };
5757
5758     blt_pshader = masked ? blt_pshaders_masked[tex_type] : blt_pshaders_full[tex_type];
5759     if (!blt_pshader)
5760     {
5761         FIXME("tex_type %#x not supported\n", tex_type);
5762         return 0;
5763     }
5764
5765     vshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
5766     shader_glsl_compile(gl_info, vshader_id, blt_vshader);
5767
5768     pshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
5769     shader_glsl_compile(gl_info, pshader_id, blt_pshader);
5770
5771     program_id = GL_EXTCALL(glCreateProgramObjectARB());
5772     GL_EXTCALL(glAttachObjectARB(program_id, vshader_id));
5773     GL_EXTCALL(glAttachObjectARB(program_id, pshader_id));
5774     GL_EXTCALL(glLinkProgramARB(program_id));
5775
5776     shader_glsl_validate_link(gl_info, program_id);
5777
5778     /* Once linked we can mark the shaders for deletion. They will be deleted once the program
5779      * is destroyed
5780      */
5781     GL_EXTCALL(glDeleteObjectARB(vshader_id));
5782     GL_EXTCALL(glDeleteObjectARB(pshader_id));
5783     return program_id;
5784 }
5785
5786 /* Context activation is done by the caller. */
5787 static void shader_glsl_select(const struct wined3d_context *context, enum wined3d_shader_mode vertex_mode,
5788         enum wined3d_shader_mode fragment_mode)
5789 {
5790     const struct wined3d_gl_info *gl_info = context->gl_info;
5791     struct wined3d_device *device = context->swapchain->device;
5792     struct shader_glsl_priv *priv = device->shader_priv;
5793     GLhandleARB program_id = 0;
5794     GLenum old_vertex_color_clamp, current_vertex_color_clamp;
5795
5796     priv->vertex_pipe->vp_enable(gl_info, vertex_mode == WINED3D_SHADER_MODE_FFP);
5797     priv->fragment_pipe->enable_extension(gl_info, fragment_mode == WINED3D_SHADER_MODE_FFP);
5798
5799     old_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vs.vertex_color_clamp : GL_FIXED_ONLY_ARB;
5800     set_glsl_shader_program(context, device, vertex_mode, fragment_mode);
5801     current_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vs.vertex_color_clamp : GL_FIXED_ONLY_ARB;
5802     if (old_vertex_color_clamp != current_vertex_color_clamp)
5803     {
5804         if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT])
5805         {
5806             GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, current_vertex_color_clamp));
5807             checkGLcall("glClampColorARB");
5808         }
5809         else
5810         {
5811             FIXME("vertex color clamp needs to be changed, but extension not supported.\n");
5812         }
5813     }
5814
5815     program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
5816     if (program_id) TRACE("Using GLSL program %u\n", program_id);
5817     GL_EXTCALL(glUseProgramObjectARB(program_id));
5818     checkGLcall("glUseProgramObjectARB");
5819
5820     /* In case that NP2 texcoord fixup data is found for the selected program, trigger a reload of the
5821      * constants. This has to be done because it can't be guaranteed that sampler() (from state.c) is
5822      * called between selecting the shader and using it, which results in wrong fixup for some frames. */
5823     if (priv->glsl_program && priv->glsl_program->ps.np2_fixup_info)
5824     {
5825         shader_glsl_load_np2fixup_constants(priv, gl_info, &device->stateBlock->state);
5826     }
5827 }
5828
5829 /* Context activation is done by the caller. */
5830 static void shader_glsl_select_depth_blt(void *shader_priv, const struct wined3d_gl_info *gl_info,
5831         enum tex_types tex_type, const SIZE *ds_mask_size)
5832 {
5833     BOOL masked = ds_mask_size->cx && ds_mask_size->cy;
5834     struct shader_glsl_priv *priv = shader_priv;
5835     GLhandleARB *blt_program;
5836     GLint loc;
5837
5838     blt_program = masked ? &priv->depth_blt_program_masked[tex_type] : &priv->depth_blt_program_full[tex_type];
5839     if (!*blt_program)
5840     {
5841         *blt_program = create_glsl_blt_shader(gl_info, tex_type, masked);
5842         loc = GL_EXTCALL(glGetUniformLocationARB(*blt_program, "sampler"));
5843         GL_EXTCALL(glUseProgramObjectARB(*blt_program));
5844         GL_EXTCALL(glUniform1iARB(loc, 0));
5845     }
5846     else
5847     {
5848         GL_EXTCALL(glUseProgramObjectARB(*blt_program));
5849     }
5850
5851     if (masked)
5852     {
5853         loc = GL_EXTCALL(glGetUniformLocationARB(*blt_program, "mask"));
5854         GL_EXTCALL(glUniform4fARB(loc, 0.0f, 0.0f, (float)ds_mask_size->cx, (float)ds_mask_size->cy));
5855     }
5856 }
5857
5858 /* Context activation is done by the caller. */
5859 static void shader_glsl_deselect_depth_blt(void *shader_priv, const struct wined3d_gl_info *gl_info)
5860 {
5861     struct shader_glsl_priv *priv = shader_priv;
5862     GLhandleARB program_id;
5863
5864     program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
5865     if (program_id) TRACE("Using GLSL program %u\n", program_id);
5866
5867     GL_EXTCALL(glUseProgramObjectARB(program_id));
5868     checkGLcall("glUseProgramObjectARB");
5869 }
5870
5871 static void shader_glsl_destroy(struct wined3d_shader *shader)
5872 {
5873     struct glsl_shader_private *shader_data = shader->backend_data;
5874     struct wined3d_device *device = shader->device;
5875     struct shader_glsl_priv *priv = device->shader_priv;
5876     const struct wined3d_gl_info *gl_info;
5877     const struct list *linked_programs;
5878     struct wined3d_context *context;
5879
5880     if (!shader_data || !shader_data->num_gl_shaders)
5881     {
5882         HeapFree(GetProcessHeap(), 0, shader_data);
5883         shader->backend_data = NULL;
5884         return;
5885     }
5886
5887     context = context_acquire(device, NULL);
5888     gl_info = context->gl_info;
5889
5890     TRACE("Deleting linked programs.\n");
5891     linked_programs = &shader->linked_programs;
5892     if (linked_programs->next)
5893     {
5894         struct glsl_shader_prog_link *entry, *entry2;
5895         UINT i;
5896
5897         switch (shader->reg_maps.shader_version.type)
5898         {
5899             case WINED3D_SHADER_TYPE_PIXEL:
5900             {
5901                 struct glsl_ps_compiled_shader *gl_shaders = shader_data->gl_shaders.ps;
5902
5903                 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs,
5904                         struct glsl_shader_prog_link, ps.shader_entry)
5905                 {
5906                     delete_glsl_program_entry(priv, gl_info, entry);
5907                 }
5908
5909                 for (i = 0; i < shader_data->num_gl_shaders; ++i)
5910                 {
5911                     TRACE("Deleting pixel shader %u.\n", gl_shaders[i].prgId);
5912                     if (priv->glsl_program && priv->glsl_program->ps.id == gl_shaders[i].prgId)
5913                         shader_glsl_select(context, WINED3D_SHADER_MODE_NONE, WINED3D_SHADER_MODE_NONE);
5914                     GL_EXTCALL(glDeleteObjectARB(gl_shaders[i].prgId));
5915                     checkGLcall("glDeleteObjectARB");
5916                 }
5917                 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders.ps);
5918
5919                 break;
5920             }
5921
5922             case WINED3D_SHADER_TYPE_VERTEX:
5923             {
5924                 struct glsl_vs_compiled_shader *gl_shaders = shader_data->gl_shaders.vs;
5925
5926                 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs,
5927                         struct glsl_shader_prog_link, vs.shader_entry)
5928                 {
5929                     delete_glsl_program_entry(priv, gl_info, entry);
5930                 }
5931
5932                 for (i = 0; i < shader_data->num_gl_shaders; ++i)
5933                 {
5934                     TRACE("Deleting vertex shader %u.\n", gl_shaders[i].prgId);
5935                     if (priv->glsl_program && priv->glsl_program->vs.id == gl_shaders[i].prgId)
5936                         shader_glsl_select(context, WINED3D_SHADER_MODE_NONE, WINED3D_SHADER_MODE_NONE);
5937                     GL_EXTCALL(glDeleteObjectARB(gl_shaders[i].prgId));
5938                     checkGLcall("glDeleteObjectARB");
5939                 }
5940                 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders.vs);
5941
5942                 break;
5943             }
5944
5945             case WINED3D_SHADER_TYPE_GEOMETRY:
5946             {
5947                 struct glsl_gs_compiled_shader *gl_shaders = shader_data->gl_shaders.gs;
5948
5949                 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs,
5950                         struct glsl_shader_prog_link, gs.shader_entry)
5951                 {
5952                     delete_glsl_program_entry(priv, gl_info, entry);
5953                 }
5954
5955                 for (i = 0; i < shader_data->num_gl_shaders; ++i)
5956                 {
5957                     TRACE("Deleting geometry shader %u.\n", gl_shaders[i].id);
5958                     if (priv->glsl_program && priv->glsl_program->gs.id == gl_shaders[i].id)
5959                         shader_glsl_select(context, WINED3D_SHADER_MODE_NONE, WINED3D_SHADER_MODE_NONE);
5960                     GL_EXTCALL(glDeleteObjectARB(gl_shaders[i].id));
5961                     checkGLcall("glDeleteObjectARB");
5962                 }
5963                 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders.gs);
5964
5965                 break;
5966             }
5967
5968             default:
5969                 ERR("Unhandled shader type %#x.\n", shader->reg_maps.shader_version.type);
5970                 break;
5971         }
5972     }
5973
5974     HeapFree(GetProcessHeap(), 0, shader->backend_data);
5975     shader->backend_data = NULL;
5976
5977     context_release(context);
5978 }
5979
5980 static int glsl_program_key_compare(const void *key, const struct wine_rb_entry *entry)
5981 {
5982     const struct glsl_program_key *k = key;
5983     const struct glsl_shader_prog_link *prog = WINE_RB_ENTRY_VALUE(entry,
5984             const struct glsl_shader_prog_link, program_lookup_entry);
5985
5986     if (k->vs_id > prog->vs.id) return 1;
5987     else if (k->vs_id < prog->vs.id) return -1;
5988
5989     if (k->gs_id > prog->gs.id) return 1;
5990     else if (k->gs_id < prog->gs.id) return -1;
5991
5992     if (k->ps_id > prog->ps.id) return 1;
5993     else if (k->ps_id < prog->ps.id) return -1;
5994
5995     return 0;
5996 }
5997
5998 static BOOL constant_heap_init(struct constant_heap *heap, unsigned int constant_count)
5999 {
6000     SIZE_T size = (constant_count + 1) * sizeof(*heap->entries) + constant_count * sizeof(*heap->positions);
6001     void *mem = HeapAlloc(GetProcessHeap(), 0, size);
6002
6003     if (!mem)
6004     {
6005         ERR("Failed to allocate memory\n");
6006         return FALSE;
6007     }
6008
6009     heap->entries = mem;
6010     heap->entries[1].version = 0;
6011     heap->positions = (unsigned int *)(heap->entries + constant_count + 1);
6012     heap->size = 1;
6013
6014     return TRUE;
6015 }
6016
6017 static void constant_heap_free(struct constant_heap *heap)
6018 {
6019     HeapFree(GetProcessHeap(), 0, heap->entries);
6020 }
6021
6022 static const struct wine_rb_functions wined3d_glsl_program_rb_functions =
6023 {
6024     wined3d_rb_alloc,
6025     wined3d_rb_realloc,
6026     wined3d_rb_free,
6027     glsl_program_key_compare,
6028 };
6029
6030 static HRESULT shader_glsl_alloc(struct wined3d_device *device, const struct wined3d_vertex_pipe_ops *vertex_pipe,
6031         const struct fragment_pipeline *fragment_pipe)
6032 {
6033     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6034     struct shader_glsl_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_glsl_priv));
6035     SIZE_T stack_size = wined3d_log2i(max(gl_info->limits.glsl_vs_float_constants,
6036             gl_info->limits.glsl_ps_float_constants)) + 1;
6037     struct fragment_caps fragment_caps;
6038     void *vertex_priv, *fragment_priv;
6039
6040     if (!(vertex_priv = vertex_pipe->vp_alloc(&glsl_shader_backend, priv)))
6041     {
6042         ERR("Failed to initialize vertex pipe.\n");
6043         HeapFree(GetProcessHeap(), 0, priv);
6044         return E_FAIL;
6045     }
6046
6047     if (!(fragment_priv = fragment_pipe->alloc_private(&glsl_shader_backend, priv)))
6048     {
6049         ERR("Failed to initialize fragment pipe.\n");
6050         vertex_pipe->vp_free(device);
6051         HeapFree(GetProcessHeap(), 0, priv);
6052         return E_FAIL;
6053     }
6054
6055     if (!shader_buffer_init(&priv->shader_buffer))
6056     {
6057         ERR("Failed to initialize shader buffer.\n");
6058         goto fail;
6059     }
6060
6061     priv->stack = HeapAlloc(GetProcessHeap(), 0, stack_size * sizeof(*priv->stack));
6062     if (!priv->stack)
6063     {
6064         ERR("Failed to allocate memory.\n");
6065         goto fail;
6066     }
6067
6068     if (!constant_heap_init(&priv->vconst_heap, gl_info->limits.glsl_vs_float_constants))
6069     {
6070         ERR("Failed to initialize vertex shader constant heap\n");
6071         goto fail;
6072     }
6073
6074     if (!constant_heap_init(&priv->pconst_heap, gl_info->limits.glsl_ps_float_constants))
6075     {
6076         ERR("Failed to initialize pixel shader constant heap\n");
6077         goto fail;
6078     }
6079
6080     if (wine_rb_init(&priv->program_lookup, &wined3d_glsl_program_rb_functions) == -1)
6081     {
6082         ERR("Failed to initialize rbtree.\n");
6083         goto fail;
6084     }
6085
6086     priv->next_constant_version = 1;
6087     priv->vertex_pipe = vertex_pipe;
6088     priv->fragment_pipe = fragment_pipe;
6089     fragment_pipe->get_caps(gl_info, &fragment_caps);
6090     priv->ffp_proj_control = fragment_caps.wined3d_caps & WINED3D_FRAGMENT_CAP_PROJ_CONTROL;
6091
6092     device->vertex_priv = vertex_priv;
6093     device->fragment_priv = fragment_priv;
6094     device->shader_priv = priv;
6095
6096     return WINED3D_OK;
6097
6098 fail:
6099     constant_heap_free(&priv->pconst_heap);
6100     constant_heap_free(&priv->vconst_heap);
6101     HeapFree(GetProcessHeap(), 0, priv->stack);
6102     shader_buffer_free(&priv->shader_buffer);
6103     fragment_pipe->free_private(device);
6104     vertex_pipe->vp_free(device);
6105     HeapFree(GetProcessHeap(), 0, priv);
6106     return E_OUTOFMEMORY;
6107 }
6108
6109 /* Context activation is done by the caller. */
6110 static void shader_glsl_free(struct wined3d_device *device)
6111 {
6112     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6113     struct shader_glsl_priv *priv = device->shader_priv;
6114     int i;
6115
6116     for (i = 0; i < tex_type_count; ++i)
6117     {
6118         if (priv->depth_blt_program_full[i])
6119         {
6120             GL_EXTCALL(glDeleteObjectARB(priv->depth_blt_program_full[i]));
6121         }
6122         if (priv->depth_blt_program_masked[i])
6123         {
6124             GL_EXTCALL(glDeleteObjectARB(priv->depth_blt_program_masked[i]));
6125         }
6126     }
6127
6128     wine_rb_destroy(&priv->program_lookup, NULL, NULL);
6129     constant_heap_free(&priv->pconst_heap);
6130     constant_heap_free(&priv->vconst_heap);
6131     HeapFree(GetProcessHeap(), 0, priv->stack);
6132     shader_buffer_free(&priv->shader_buffer);
6133     priv->fragment_pipe->free_private(device);
6134     priv->vertex_pipe->vp_free(device);
6135
6136     HeapFree(GetProcessHeap(), 0, device->shader_priv);
6137     device->shader_priv = NULL;
6138 }
6139
6140 static void shader_glsl_context_destroyed(void *shader_priv, const struct wined3d_context *context) {}
6141
6142 static void shader_glsl_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *caps)
6143 {
6144     UINT shader_model;
6145
6146     if (gl_info->supported[EXT_GPU_SHADER4] && gl_info->supported[ARB_SHADER_BIT_ENCODING]
6147             && gl_info->supported[ARB_GEOMETRY_SHADER4] && gl_info->glsl_version >= MAKEDWORD_VERSION(1, 50)
6148             && gl_info->supported[ARB_DRAW_ELEMENTS_BASE_VERTEX] && gl_info->supported[ARB_DRAW_INSTANCED])
6149         shader_model = 4;
6150     /* ARB_shader_texture_lod or EXT_gpu_shader4 is required for the SM3
6151      * texldd and texldl instructions. */
6152     else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD] || gl_info->supported[EXT_GPU_SHADER4])
6153         shader_model = 3;
6154     else
6155         shader_model = 2;
6156     TRACE("Shader model %u.\n", shader_model);
6157
6158     caps->vs_version = min(wined3d_settings.max_sm_vs, shader_model);
6159     caps->gs_version = min(wined3d_settings.max_sm_gs, shader_model);
6160     caps->ps_version = min(wined3d_settings.max_sm_ps, shader_model);
6161
6162     caps->vs_uniform_count = gl_info->limits.glsl_vs_float_constants;
6163     caps->ps_uniform_count = gl_info->limits.glsl_ps_float_constants;
6164
6165     /* FIXME: The following line is card dependent. -8.0 to 8.0 is the
6166      * Direct3D minimum requirement.
6167      *
6168      * Both GL_ARB_fragment_program and GLSL require a "maximum representable magnitude"
6169      * of colors to be 2^10, and 2^32 for other floats. Should we use 1024 here?
6170      *
6171      * The problem is that the refrast clamps temporary results in the shader to
6172      * [-MaxValue;+MaxValue]. If the card's max value is bigger than the one we advertize here,
6173      * then applications may miss the clamping behavior. On the other hand, if it is smaller,
6174      * the shader will generate incorrect results too. Unfortunately, GL deliberately doesn't
6175      * offer a way to query this.
6176      */
6177     caps->ps_1x_max_value = 8.0;
6178
6179     /* Ideally we'd only set caps like sRGB writes here if supported by both
6180      * the shader backend and the fragment pipe, but we can get called before
6181      * shader_glsl_alloc(). */
6182     caps->wined3d_caps = WINED3D_SHADER_CAP_VS_CLIPPING
6183             | WINED3D_SHADER_CAP_SRGB_WRITE;
6184 }
6185
6186 static BOOL shader_glsl_color_fixup_supported(struct color_fixup_desc fixup)
6187 {
6188     if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
6189     {
6190         TRACE("Checking support for fixup:\n");
6191         dump_color_fixup_desc(fixup);
6192     }
6193
6194     /* We support everything except YUV conversions. */
6195     if (!is_complex_fixup(fixup))
6196     {
6197         TRACE("[OK]\n");
6198         return TRUE;
6199     }
6200
6201     TRACE("[FAILED]\n");
6202     return FALSE;
6203 }
6204
6205 static const SHADER_HANDLER shader_glsl_instruction_handler_table[WINED3DSIH_TABLE_SIZE] =
6206 {
6207     /* WINED3DSIH_ABS                   */ shader_glsl_map2gl,
6208     /* WINED3DSIH_ADD                   */ shader_glsl_binop,
6209     /* WINED3DSIH_AND                   */ shader_glsl_binop,
6210     /* WINED3DSIH_BEM                   */ shader_glsl_bem,
6211     /* WINED3DSIH_BREAK                 */ shader_glsl_break,
6212     /* WINED3DSIH_BREAKC                */ shader_glsl_breakc,
6213     /* WINED3DSIH_BREAKP                */ shader_glsl_breakp,
6214     /* WINED3DSIH_CALL                  */ shader_glsl_call,
6215     /* WINED3DSIH_CALLNZ                */ shader_glsl_callnz,
6216     /* WINED3DSIH_CMP                   */ shader_glsl_conditional_move,
6217     /* WINED3DSIH_CND                   */ shader_glsl_cnd,
6218     /* WINED3DSIH_CRS                   */ shader_glsl_cross,
6219     /* WINED3DSIH_CUT                   */ shader_glsl_cut,
6220     /* WINED3DSIH_DCL                   */ shader_glsl_nop,
6221     /* WINED3DSIH_DCL_CONSTANT_BUFFER   */ shader_glsl_nop,
6222     /* WINED3DSIH_DCL_INPUT_PRIMITIVE   */ shader_glsl_nop,
6223     /* WINED3DSIH_DCL_OUTPUT_TOPOLOGY   */ shader_glsl_nop,
6224     /* WINED3DSIH_DCL_VERTICES_OUT      */ shader_glsl_nop,
6225     /* WINED3DSIH_DEF                   */ shader_glsl_nop,
6226     /* WINED3DSIH_DEFB                  */ shader_glsl_nop,
6227     /* WINED3DSIH_DEFI                  */ shader_glsl_nop,
6228     /* WINED3DSIH_DIV                   */ shader_glsl_binop,
6229     /* WINED3DSIH_DP2ADD                */ shader_glsl_dp2add,
6230     /* WINED3DSIH_DP3                   */ shader_glsl_dot,
6231     /* WINED3DSIH_DP4                   */ shader_glsl_dot,
6232     /* WINED3DSIH_DST                   */ shader_glsl_dst,
6233     /* WINED3DSIH_DSX                   */ shader_glsl_map2gl,
6234     /* WINED3DSIH_DSY                   */ shader_glsl_map2gl,
6235     /* WINED3DSIH_ELSE                  */ shader_glsl_else,
6236     /* WINED3DSIH_EMIT                  */ shader_glsl_emit,
6237     /* WINED3DSIH_ENDIF                 */ shader_glsl_end,
6238     /* WINED3DSIH_ENDLOOP               */ shader_glsl_end,
6239     /* WINED3DSIH_ENDREP                */ shader_glsl_end,
6240     /* WINED3DSIH_EQ                    */ shader_glsl_relop,
6241     /* WINED3DSIH_EXP                   */ shader_glsl_map2gl,
6242     /* WINED3DSIH_EXPP                  */ shader_glsl_expp,
6243     /* WINED3DSIH_FRC                   */ shader_glsl_map2gl,
6244     /* WINED3DSIH_FTOI                  */ shader_glsl_to_int,
6245     /* WINED3DSIH_GE                    */ shader_glsl_relop,
6246     /* WINED3DSIH_IADD                  */ shader_glsl_binop,
6247     /* WINED3DSIH_IEQ                   */ NULL,
6248     /* WINED3DSIH_IF                    */ shader_glsl_if,
6249     /* WINED3DSIH_IFC                   */ shader_glsl_ifc,
6250     /* WINED3DSIH_IGE                   */ shader_glsl_relop,
6251     /* WINED3DSIH_IMUL                  */ shader_glsl_imul,
6252     /* WINED3DSIH_ITOF                  */ shader_glsl_to_float,
6253     /* WINED3DSIH_LABEL                 */ shader_glsl_label,
6254     /* WINED3DSIH_LD                    */ NULL,
6255     /* WINED3DSIH_LIT                   */ shader_glsl_lit,
6256     /* WINED3DSIH_LOG                   */ shader_glsl_log,
6257     /* WINED3DSIH_LOGP                  */ shader_glsl_log,
6258     /* WINED3DSIH_LOOP                  */ shader_glsl_loop,
6259     /* WINED3DSIH_LRP                   */ shader_glsl_lrp,
6260     /* WINED3DSIH_LT                    */ shader_glsl_relop,
6261     /* WINED3DSIH_M3x2                  */ shader_glsl_mnxn,
6262     /* WINED3DSIH_M3x3                  */ shader_glsl_mnxn,
6263     /* WINED3DSIH_M3x4                  */ shader_glsl_mnxn,
6264     /* WINED3DSIH_M4x3                  */ shader_glsl_mnxn,
6265     /* WINED3DSIH_M4x4                  */ shader_glsl_mnxn,
6266     /* WINED3DSIH_MAD                   */ shader_glsl_mad,
6267     /* WINED3DSIH_MAX                   */ shader_glsl_map2gl,
6268     /* WINED3DSIH_MIN                   */ shader_glsl_map2gl,
6269     /* WINED3DSIH_MOV                   */ shader_glsl_mov,
6270     /* WINED3DSIH_MOVA                  */ shader_glsl_mov,
6271     /* WINED3DSIH_MOVC                  */ shader_glsl_conditional_move,
6272     /* WINED3DSIH_MUL                   */ shader_glsl_binop,
6273     /* WINED3DSIH_NOP                   */ shader_glsl_nop,
6274     /* WINED3DSIH_NRM                   */ shader_glsl_nrm,
6275     /* WINED3DSIH_PHASE                 */ shader_glsl_nop,
6276     /* WINED3DSIH_POW                   */ shader_glsl_pow,
6277     /* WINED3DSIH_RCP                   */ shader_glsl_rcp,
6278     /* WINED3DSIH_REP                   */ shader_glsl_rep,
6279     /* WINED3DSIH_RET                   */ shader_glsl_ret,
6280     /* WINED3DSIH_ROUND_NI              */ shader_glsl_map2gl,
6281     /* WINED3DSIH_RSQ                   */ shader_glsl_rsq,
6282     /* WINED3DSIH_SAMPLE                */ NULL,
6283     /* WINED3DSIH_SAMPLE_GRAD           */ NULL,
6284     /* WINED3DSIH_SAMPLE_LOD            */ NULL,
6285     /* WINED3DSIH_SETP                  */ NULL,
6286     /* WINED3DSIH_SGE                   */ shader_glsl_compare,
6287     /* WINED3DSIH_SGN                   */ shader_glsl_sgn,
6288     /* WINED3DSIH_SINCOS                */ shader_glsl_sincos,
6289     /* WINED3DSIH_SLT                   */ shader_glsl_compare,
6290     /* WINED3DSIH_SQRT                  */ NULL,
6291     /* WINED3DSIH_SUB                   */ shader_glsl_binop,
6292     /* WINED3DSIH_TEX                   */ shader_glsl_tex,
6293     /* WINED3DSIH_TEXBEM                */ shader_glsl_texbem,
6294     /* WINED3DSIH_TEXBEML               */ shader_glsl_texbem,
6295     /* WINED3DSIH_TEXCOORD              */ shader_glsl_texcoord,
6296     /* WINED3DSIH_TEXDEPTH              */ shader_glsl_texdepth,
6297     /* WINED3DSIH_TEXDP3                */ shader_glsl_texdp3,
6298     /* WINED3DSIH_TEXDP3TEX             */ shader_glsl_texdp3tex,
6299     /* WINED3DSIH_TEXKILL               */ shader_glsl_texkill,
6300     /* WINED3DSIH_TEXLDD                */ shader_glsl_texldd,
6301     /* WINED3DSIH_TEXLDL                */ shader_glsl_texldl,
6302     /* WINED3DSIH_TEXM3x2DEPTH          */ shader_glsl_texm3x2depth,
6303     /* WINED3DSIH_TEXM3x2PAD            */ shader_glsl_texm3x2pad,
6304     /* WINED3DSIH_TEXM3x2TEX            */ shader_glsl_texm3x2tex,
6305     /* WINED3DSIH_TEXM3x3               */ shader_glsl_texm3x3,
6306     /* WINED3DSIH_TEXM3x3DIFF           */ NULL,
6307     /* WINED3DSIH_TEXM3x3PAD            */ shader_glsl_texm3x3pad,
6308     /* WINED3DSIH_TEXM3x3SPEC           */ shader_glsl_texm3x3spec,
6309     /* WINED3DSIH_TEXM3x3TEX            */ shader_glsl_texm3x3tex,
6310     /* WINED3DSIH_TEXM3x3VSPEC          */ shader_glsl_texm3x3vspec,
6311     /* WINED3DSIH_TEXREG2AR             */ shader_glsl_texreg2ar,
6312     /* WINED3DSIH_TEXREG2GB             */ shader_glsl_texreg2gb,
6313     /* WINED3DSIH_TEXREG2RGB            */ shader_glsl_texreg2rgb,
6314     /* WINED3DSIH_UDIV                  */ shader_glsl_udiv,
6315     /* WINED3DSIH_USHR                  */ shader_glsl_binop,
6316     /* WINED3DSIH_UTOF                  */ shader_glsl_to_float,
6317     /* WINED3DSIH_XOR                   */ shader_glsl_binop,
6318 };
6319
6320 static void shader_glsl_handle_instruction(const struct wined3d_shader_instruction *ins) {
6321     SHADER_HANDLER hw_fct;
6322
6323     /* Select handler */
6324     hw_fct = shader_glsl_instruction_handler_table[ins->handler_idx];
6325
6326     /* Unhandled opcode */
6327     if (!hw_fct)
6328     {
6329         FIXME("Backend can't handle opcode %#x\n", ins->handler_idx);
6330         return;
6331     }
6332     hw_fct(ins);
6333
6334     shader_glsl_add_instruction_modifiers(ins);
6335 }
6336
6337 static BOOL shader_glsl_has_ffp_proj_control(void *shader_priv)
6338 {
6339     struct shader_glsl_priv *priv = shader_priv;
6340
6341     return priv->ffp_proj_control;
6342 }
6343
6344 const struct wined3d_shader_backend_ops glsl_shader_backend =
6345 {
6346     shader_glsl_handle_instruction,
6347     shader_glsl_select,
6348     shader_glsl_select_depth_blt,
6349     shader_glsl_deselect_depth_blt,
6350     shader_glsl_update_float_vertex_constants,
6351     shader_glsl_update_float_pixel_constants,
6352     shader_glsl_load_constants,
6353     shader_glsl_load_np2fixup_constants,
6354     shader_glsl_destroy,
6355     shader_glsl_alloc,
6356     shader_glsl_free,
6357     shader_glsl_context_destroyed,
6358     shader_glsl_get_caps,
6359     shader_glsl_color_fixup_supported,
6360     shader_glsl_has_ffp_proj_control,
6361 };
6362
6363 static void glsl_fragment_pipe_enable(const struct wined3d_gl_info *gl_info, BOOL enable)
6364 {
6365     /* Nothing to do. */
6366 }
6367
6368 static void glsl_fragment_pipe_get_caps(const struct wined3d_gl_info *gl_info, struct fragment_caps *caps)
6369 {
6370     caps->wined3d_caps = WINED3D_FRAGMENT_CAP_PROJ_CONTROL
6371             | WINED3D_FRAGMENT_CAP_SRGB_WRITE;
6372     caps->PrimitiveMiscCaps = WINED3DPMISCCAPS_TSSARGTEMP;
6373     caps->TextureOpCaps = WINED3DTEXOPCAPS_DISABLE
6374             | WINED3DTEXOPCAPS_SELECTARG1
6375             | WINED3DTEXOPCAPS_SELECTARG2
6376             | WINED3DTEXOPCAPS_MODULATE4X
6377             | WINED3DTEXOPCAPS_MODULATE2X
6378             | WINED3DTEXOPCAPS_MODULATE
6379             | WINED3DTEXOPCAPS_ADDSIGNED2X
6380             | WINED3DTEXOPCAPS_ADDSIGNED
6381             | WINED3DTEXOPCAPS_ADD
6382             | WINED3DTEXOPCAPS_SUBTRACT
6383             | WINED3DTEXOPCAPS_ADDSMOOTH
6384             | WINED3DTEXOPCAPS_BLENDCURRENTALPHA
6385             | WINED3DTEXOPCAPS_BLENDFACTORALPHA
6386             | WINED3DTEXOPCAPS_BLENDTEXTUREALPHA
6387             | WINED3DTEXOPCAPS_BLENDDIFFUSEALPHA
6388             | WINED3DTEXOPCAPS_BLENDTEXTUREALPHAPM
6389             | WINED3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR
6390             | WINED3DTEXOPCAPS_MODULATECOLOR_ADDALPHA
6391             | WINED3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA
6392             | WINED3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR
6393             | WINED3DTEXOPCAPS_DOTPRODUCT3
6394             | WINED3DTEXOPCAPS_MULTIPLYADD
6395             | WINED3DTEXOPCAPS_LERP
6396             | WINED3DTEXOPCAPS_BUMPENVMAP
6397             | WINED3DTEXOPCAPS_BUMPENVMAPLUMINANCE;
6398     caps->MaxTextureBlendStages = 8;
6399     caps->MaxSimultaneousTextures = min(gl_info->limits.fragment_samplers, 8);
6400 }
6401
6402 static void *glsl_fragment_pipe_alloc(const struct wined3d_shader_backend_ops *shader_backend, void *shader_priv)
6403 {
6404     struct shader_glsl_priv *priv;
6405
6406     if (shader_backend == &glsl_shader_backend)
6407     {
6408         priv = shader_priv;
6409
6410         if (wine_rb_init(&priv->ffp_fragment_shaders, &wined3d_ffp_frag_program_rb_functions) == -1)
6411         {
6412             ERR("Failed to initialize rbtree.\n");
6413             return NULL;
6414         }
6415
6416         return priv;
6417     }
6418
6419     FIXME("GLSL fragment pipe without GLSL shader backend not implemented.\n");
6420
6421     return NULL;
6422 }
6423
6424 struct glsl_ffp_destroy_ctx
6425 {
6426     struct shader_glsl_priv *priv;
6427     const struct wined3d_gl_info *gl_info;
6428 };
6429
6430 static void shader_glsl_free_ffp_fragment_shader(struct wine_rb_entry *entry, void *context)
6431 {
6432     struct glsl_ffp_fragment_shader *shader = WINE_RB_ENTRY_VALUE(entry,
6433             struct glsl_ffp_fragment_shader, entry.entry);
6434     struct glsl_shader_prog_link *program, *program2;
6435     struct glsl_ffp_destroy_ctx *ctx = context;
6436
6437     LIST_FOR_EACH_ENTRY_SAFE(program, program2, &shader->linked_programs,
6438             struct glsl_shader_prog_link, ps.shader_entry)
6439     {
6440         delete_glsl_program_entry(ctx->priv, ctx->gl_info, program);
6441     }
6442     ctx->gl_info->gl_ops.ext.p_glDeleteObjectARB(shader->id);
6443     HeapFree(GetProcessHeap(), 0, shader);
6444 }
6445
6446 /* Context activation is done by the caller. */
6447 static void glsl_fragment_pipe_free(struct wined3d_device *device)
6448 {
6449     struct shader_glsl_priv *priv = device->fragment_priv;
6450     struct glsl_ffp_destroy_ctx ctx;
6451
6452     ctx.priv = priv;
6453     ctx.gl_info = &device->adapter->gl_info;
6454     wine_rb_destroy(&priv->ffp_fragment_shaders, shader_glsl_free_ffp_fragment_shader, &ctx);
6455 }
6456
6457 static void glsl_fragment_pipe_shader(struct wined3d_context *context,
6458         const struct wined3d_state *state, DWORD state_id)
6459 {
6460     context->last_was_pshader = use_ps(state);
6461
6462     context->select_shader = 1;
6463     context->load_constants = 1;
6464 }
6465
6466 static void glsl_fragment_pipe_fog(struct wined3d_context *context,
6467         const struct wined3d_state *state, DWORD state_id)
6468 {
6469     BOOL use_vshader = use_vs(state);
6470     enum fogsource new_source;
6471
6472     context->select_shader = 1;
6473     context->load_constants = 1;
6474
6475     if (!state->render_states[WINED3D_RS_FOGENABLE])
6476         return;
6477
6478     if (state->render_states[WINED3D_RS_FOGTABLEMODE] == WINED3D_FOG_NONE)
6479     {
6480         if (use_vshader)
6481             new_source = FOGSOURCE_VS;
6482         else if (state->render_states[WINED3D_RS_FOGVERTEXMODE] == WINED3D_FOG_NONE || context->last_was_rhw)
6483             new_source = FOGSOURCE_COORD;
6484         else
6485             new_source = FOGSOURCE_FFP;
6486     }
6487     else
6488     {
6489         new_source = FOGSOURCE_FFP;
6490     }
6491
6492     if (new_source != context->fog_source)
6493     {
6494         context->fog_source = new_source;
6495         state_fogstartend(context, state, STATE_RENDER(WINED3D_RS_FOGSTART));
6496     }
6497 }
6498
6499 static void glsl_fragment_pipe_tex_transform(struct wined3d_context *context,
6500         const struct wined3d_state *state, DWORD state_id)
6501 {
6502     context->select_shader = 1;
6503     context->load_constants = 1;
6504 }
6505
6506 static void glsl_fragment_pipe_invalidate_constants(struct wined3d_context *context,
6507         const struct wined3d_state *state, DWORD state_id)
6508 {
6509     context->load_constants = 1;
6510 }
6511
6512 static const struct StateEntryTemplate glsl_fragment_pipe_state_template[] =
6513 {
6514     {STATE_RENDER(WINED3D_RS_TEXTUREFACTOR),                    {STATE_RENDER(WINED3D_RS_TEXTUREFACTOR),                     glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6515     {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6516     {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6517     {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6518     {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6519     {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6520     {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6521     {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6522     {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6523     {STATE_TEXTURESTAGE(0, WINED3D_TSS_RESULT_ARG),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6524     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),          {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),           glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6525     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT01),          {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6526     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT10),          {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6527     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT11),          {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6528     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE),         {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE),          glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6529     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LOFFSET),        {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE),          NULL                                   }, WINED3D_GL_EXT_NONE },
6530     {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6531     {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6532     {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6533     {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6534     {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6535     {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6536     {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6537     {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6538     {STATE_TEXTURESTAGE(1, WINED3D_TSS_RESULT_ARG),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6539     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),          {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),           glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6540     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT01),          {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6541     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT10),          {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6542     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT11),          {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6543     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE),         {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE),          glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6544     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LOFFSET),        {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE),          NULL                                   }, WINED3D_GL_EXT_NONE },
6545     {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6546     {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6547     {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6548     {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6549     {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6550     {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6551     {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6552     {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6553     {STATE_TEXTURESTAGE(2, WINED3D_TSS_RESULT_ARG),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6554     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),          {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),           glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6555     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT01),          {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6556     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT10),          {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6557     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT11),          {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6558     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE),         {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE),          glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6559     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LOFFSET),        {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE),          NULL                                   }, WINED3D_GL_EXT_NONE },
6560     {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6561     {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6562     {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6563     {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6564     {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6565     {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6566     {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6567     {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6568     {STATE_TEXTURESTAGE(3, WINED3D_TSS_RESULT_ARG),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6569     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),          {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),           glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6570     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT01),          {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6571     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT10),          {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6572     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT11),          {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6573     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE),         {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE),          glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6574     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LOFFSET),        {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE),          NULL                                   }, WINED3D_GL_EXT_NONE },
6575     {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6576     {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6577     {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6578     {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6579     {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6580     {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6581     {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6582     {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6583     {STATE_TEXTURESTAGE(4, WINED3D_TSS_RESULT_ARG),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6584     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),          {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),           glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6585     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT01),          {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6586     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT10),          {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6587     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT11),          {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6588     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE),         {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE),          glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6589     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LOFFSET),        {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE),          NULL                                   }, WINED3D_GL_EXT_NONE },
6590     {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6591     {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6592     {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6593     {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6594     {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6595     {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6596     {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6597     {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6598     {STATE_TEXTURESTAGE(5, WINED3D_TSS_RESULT_ARG),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6599     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),          {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),           glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6600     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT01),          {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6601     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT10),          {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6602     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT11),          {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6603     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE),         {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE),          glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6604     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LOFFSET),        {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE),          NULL                                   }, WINED3D_GL_EXT_NONE },
6605     {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6606     {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6607     {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6608     {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6609     {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6610     {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6611     {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6612     {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6613     {STATE_TEXTURESTAGE(6, WINED3D_TSS_RESULT_ARG),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6614     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),          {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),           glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6615     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT01),          {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6616     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT10),          {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6617     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT11),          {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6618     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE),         {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE),          glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6619     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LOFFSET),        {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE),          NULL                                   }, WINED3D_GL_EXT_NONE },
6620     {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6621     {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6622     {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6623     {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6624     {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_OP),               {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6625     {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG1),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6626     {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG2),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6627     {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG0),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6628     {STATE_TEXTURESTAGE(7, WINED3D_TSS_RESULT_ARG),             {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6629     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),          {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),           glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6630     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT01),          {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6631     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT10),          {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6632     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT11),          {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),           NULL                                   }, WINED3D_GL_EXT_NONE },
6633     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE),         {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE),          glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6634     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LOFFSET),        {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE),          NULL                                   }, WINED3D_GL_EXT_NONE },
6635     {STATE_PIXELSHADER,                                         {STATE_PIXELSHADER,                                          glsl_fragment_pipe_shader              }, WINED3D_GL_EXT_NONE },
6636     {STATE_RENDER(WINED3D_RS_FOGENABLE),                        {STATE_RENDER(WINED3D_RS_FOGENABLE),                         glsl_fragment_pipe_fog                 }, WINED3D_GL_EXT_NONE },
6637     {STATE_RENDER(WINED3D_RS_FOGTABLEMODE),                     {STATE_RENDER(WINED3D_RS_FOGENABLE),                         NULL                                   }, WINED3D_GL_EXT_NONE },
6638     {STATE_RENDER(WINED3D_RS_FOGVERTEXMODE),                    {STATE_RENDER(WINED3D_RS_FOGENABLE),                         NULL                                   }, WINED3D_GL_EXT_NONE },
6639     {STATE_RENDER(WINED3D_RS_FOGSTART),                         {STATE_RENDER(WINED3D_RS_FOGSTART),                          state_fogstartend                      }, WINED3D_GL_EXT_NONE },
6640     {STATE_RENDER(WINED3D_RS_FOGEND),                           {STATE_RENDER(WINED3D_RS_FOGSTART),                          NULL                                   }, WINED3D_GL_EXT_NONE },
6641     {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE),                  {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE),                   state_srgbwrite                        }, ARB_FRAMEBUFFER_SRGB},
6642     {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE),                  {STATE_PIXELSHADER,                                          NULL                                   }, WINED3D_GL_EXT_NONE },
6643     {STATE_RENDER(WINED3D_RS_FOGCOLOR),                         {STATE_RENDER(WINED3D_RS_FOGCOLOR),                          state_fogcolor                         }, WINED3D_GL_EXT_NONE },
6644     {STATE_RENDER(WINED3D_RS_FOGDENSITY),                       {STATE_RENDER(WINED3D_RS_FOGDENSITY),                        state_fogdensity                       }, WINED3D_GL_EXT_NONE },
6645     {STATE_TEXTURESTAGE(0,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(0, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), glsl_fragment_pipe_tex_transform       }, WINED3D_GL_EXT_NONE },
6646     {STATE_TEXTURESTAGE(1,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(1, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), glsl_fragment_pipe_tex_transform       }, WINED3D_GL_EXT_NONE },
6647     {STATE_TEXTURESTAGE(2,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(2, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), glsl_fragment_pipe_tex_transform       }, WINED3D_GL_EXT_NONE },
6648     {STATE_TEXTURESTAGE(3,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(3, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), glsl_fragment_pipe_tex_transform       }, WINED3D_GL_EXT_NONE },
6649     {STATE_TEXTURESTAGE(4,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(4, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), glsl_fragment_pipe_tex_transform       }, WINED3D_GL_EXT_NONE },
6650     {STATE_TEXTURESTAGE(5,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(5, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), glsl_fragment_pipe_tex_transform       }, WINED3D_GL_EXT_NONE },
6651     {STATE_TEXTURESTAGE(6,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(6, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), glsl_fragment_pipe_tex_transform       }, WINED3D_GL_EXT_NONE },
6652     {STATE_TEXTURESTAGE(7,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(7, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), glsl_fragment_pipe_tex_transform       }, WINED3D_GL_EXT_NONE },
6653     {STATE_RENDER(WINED3D_RS_SPECULARENABLE),                   {STATE_RENDER(WINED3D_RS_SPECULARENABLE),                    glsl_fragment_pipe_invalidate_constants}, WINED3D_GL_EXT_NONE },
6654     {0 /* Terminate */,                                         {0,                                                          0                                      }, WINED3D_GL_EXT_NONE },
6655 };
6656
6657 const struct fragment_pipeline glsl_fragment_pipe =
6658 {
6659     glsl_fragment_pipe_enable,
6660     glsl_fragment_pipe_get_caps,
6661     glsl_fragment_pipe_alloc,
6662     glsl_fragment_pipe_free,
6663     shader_glsl_color_fixup_supported,
6664     glsl_fragment_pipe_state_template,
6665 };