wined3d: Emulate if(bool) in ARB shaders.
[wine] / dlls / wined3d / arb_program_shader.c
1 /*
2  * Pixel and vertex shaders implementation using ARB_vertex_program
3  * and ARB_fragment_program GL extensions.
4  *
5  * Copyright 2002-2003 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006 Ivan Gyurdiev
10  * Copyright 2006 Jason Green
11  * Copyright 2006 Henri Verbeet
12  * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
13  * Copyright 2009 Henri Verbeet for CodeWeavers
14  *
15  * This library is free software; you can redistribute it and/or
16  * modify it under the terms of the GNU Lesser General Public
17  * License as published by the Free Software Foundation; either
18  * version 2.1 of the License, or (at your option) any later version.
19  *
20  * This library is distributed in the hope that it will be useful,
21  * but WITHOUT ANY WARRANTY; without even the implied warranty of
22  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23  * Lesser General Public License for more details.
24  *
25  * You should have received a copy of the GNU Lesser General Public
26  * License along with this library; if not, write to the Free Software
27  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
28  */
29
30 #include "config.h"
31
32 #include <math.h>
33 #include <stdio.h>
34
35 #include "wined3d_private.h"
36
37 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
38 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
39 WINE_DECLARE_DEBUG_CHANNEL(d3d_caps);
40 WINE_DECLARE_DEBUG_CHANNEL(d3d);
41
42 #define GLINFO_LOCATION      (*gl_info)
43
44 /* GL locking for state handlers is done by the caller. */
45 static BOOL need_mova_const(IWineD3DBaseShader *shader, const WineD3D_GL_Info *gl_info) {
46     IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *) shader;
47     if(!This->baseShader.reg_maps.usesmova) return FALSE;
48     return !GL_SUPPORT(NV_VERTEX_PROGRAM2_OPTION);
49 }
50
51 static BOOL need_helper_const(const WineD3D_GL_Info *gl_info) {
52     if(!GL_SUPPORT(NV_VERTEX_PROGRAM)   || /* Need to init colors */
53        gl_info->arb_vs_offset_limit     || /* Have to init texcoords */
54        gl_info->set_texcoord_w) {          /* Load the immval offset */
55         return TRUE;
56     }
57     return FALSE;
58 }
59
60 static unsigned int reserved_vs_const(IWineD3DBaseShader *shader, const WineD3D_GL_Info *gl_info) {
61     unsigned int ret = 1;
62     /* We use one PARAM for the pos fixup, and in some cases one to load
63      * some immediate values into the shader
64      */
65     if(need_helper_const(gl_info)) ret++;
66     if(need_mova_const(shader, gl_info)) ret++;
67     return ret;
68 }
69
70 /* Internally used shader constants. Applications can use constants 0 to GL_LIMITS(vshader_constantsF) - 1,
71  * so upload them above that
72  */
73 #define ARB_SHADER_PRIVCONST_BASE (GL_LIMITS(vshader_constantsF) - 1)
74 #define ARB_SHADER_PRIVCONST_POS ARB_SHADER_PRIVCONST_BASE + 0
75
76 /* ARB_program_shader private data */
77 struct shader_arb_priv {
78     GLuint                  current_vprogram_id;
79     GLuint                  current_fprogram_id;
80     GLuint                  depth_blt_vprogram_id;
81     GLuint                  depth_blt_fprogram_id[tex_type_count];
82     BOOL                    use_arbfp_fixed_func;
83     struct hash_table_t     *fragment_shaders;
84 };
85
86 struct if_frame {
87     struct list entry;
88     BOOL ifc;
89     BOOL muting;
90 };
91
92 struct shader_arb_ctx_priv {
93     char addr_reg[20];
94     enum {
95         /* plain GL_ARB_vertex_program or GL_ARB_fragment_program */
96         ARB,
97         /* GL_NV_vertex_progam2_option or GL_NV_fragment_program_option */
98         NV2,
99         /* GL_NV_vertex_program3 or GL_NV_fragment_program2 */
100         NV3
101     } target_version;
102
103     const struct arb_vs_compile_args    *cur_vs_args;
104     const struct arb_ps_compile_args    *cur_ps_args;
105     struct list if_frames;
106     BOOL muted;
107 };
108
109 struct arb_ps_compile_args {
110     struct ps_compile_args          super;
111     DWORD                           bools; /* WORD is enough, use DWORD for alignment */
112 };
113
114 struct arb_ps_compiled_shader {
115     struct arb_ps_compile_args      args;
116     GLuint                          prgId;
117 };
118
119 struct arb_pshader_private {
120     struct arb_ps_compiled_shader   *gl_shaders;
121     UINT                            num_gl_shaders, shader_array_size;
122 };
123
124 struct arb_vs_compile_args {
125     struct vs_compile_args          super;
126     DWORD                           bools; /* WORD is enough, use DWORD for alignment */
127 };
128
129 struct arb_vs_compiled_shader {
130     struct arb_vs_compile_args      args;
131     GLuint                          prgId;
132 };
133
134 struct arb_vshader_private {
135     struct arb_vs_compiled_shader   *gl_shaders;
136     UINT                            num_gl_shaders, shader_array_size;
137 };
138
139 /********************************************************
140  * ARB_[vertex/fragment]_program helper functions follow
141  ********************************************************/
142
143 /** 
144  * Loads floating point constants into the currently set ARB_vertex/fragment_program.
145  * When constant_list == NULL, it will load all the constants.
146  *  
147  * @target_type should be either GL_VERTEX_PROGRAM_ARB (for vertex shaders)
148  *  or GL_FRAGMENT_PROGRAM_ARB (for pixel shaders)
149  */
150 /* GL locking is done by the caller */
151 static unsigned int shader_arb_load_constantsF(IWineD3DBaseShaderImpl* This, const WineD3D_GL_Info *gl_info,
152         GLuint target_type, unsigned int max_constants, const float *constants, char *dirty_consts)
153 {
154     local_constant* lconst;
155     DWORD i, j;
156     unsigned int ret;
157
158     if (TRACE_ON(d3d_shader)) {
159         for(i = 0; i < max_constants; i++) {
160             if(!dirty_consts[i]) continue;
161             TRACE_(d3d_constants)("Loading constants %i: %f, %f, %f, %f\n", i,
162                         constants[i * 4 + 0], constants[i * 4 + 1],
163                         constants[i * 4 + 2], constants[i * 4 + 3]);
164         }
165     }
166     /* In 1.X pixel shaders constants are implicitly clamped in the range [-1;1] */
167     if (target_type == GL_FRAGMENT_PROGRAM_ARB && This->baseShader.reg_maps.shader_version.major == 1)
168     {
169         float lcl_const[4];
170         for(i = 0; i < max_constants; i++) {
171             if(!dirty_consts[i]) continue;
172             dirty_consts[i] = 0;
173
174             j = 4 * i;
175             if(constants[j + 0] > 1.0) lcl_const[0] = 1.0;
176             else if(constants[j + 0] < -1.0) lcl_const[0] = -1.0;
177             else lcl_const[0] = constants[j + 0];
178
179             if(constants[j + 1] > 1.0) lcl_const[1] = 1.0;
180             else if(constants[j + 1] < -1.0) lcl_const[1] = -1.0;
181             else lcl_const[1] = constants[j + 1];
182
183             if(constants[j + 2] > 1.0) lcl_const[2] = 1.0;
184             else if(constants[j + 2] < -1.0) lcl_const[2] = -1.0;
185             else lcl_const[2] = constants[j + 2];
186
187             if(constants[j + 3] > 1.0) lcl_const[3] = 1.0;
188             else if(constants[j + 3] < -1.0) lcl_const[3] = -1.0;
189             else lcl_const[3] = constants[j + 3];
190
191             GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, lcl_const));
192         }
193     } else {
194         if(GL_SUPPORT(EXT_GPU_PROGRAM_PARAMETERS)) {
195             /* TODO: Benchmark if we're better of with finding the dirty constants ourselves,
196              * or just reloading *all* constants at once
197              *
198             GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, 0, max_constants, constants));
199              */
200             for(i = 0; i < max_constants; i++) {
201                 if(!dirty_consts[i]) continue;
202
203                 /* Find the next block of dirty constants */
204                 dirty_consts[i] = 0;
205                 j = i;
206                 for(i++; (i < max_constants) && dirty_consts[i]; i++) {
207                     dirty_consts[i] = 0;
208                 }
209
210                 GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, j, i - j, constants + (j * 4)));
211             }
212         } else {
213             for(i = 0; i < max_constants; i++) {
214                 if(dirty_consts[i]) {
215                     dirty_consts[i] = 0;
216                     GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, constants + (i * 4)));
217                 }
218             }
219         }
220     }
221     checkGLcall("glProgramEnvParameter4fvARB()");
222
223     /* Load immediate constants */
224     if(This->baseShader.load_local_constsF) {
225         if (TRACE_ON(d3d_shader)) {
226             LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
227                 GLfloat* values = (GLfloat*)lconst->value;
228                 TRACE_(d3d_constants)("Loading local constants %i: %f, %f, %f, %f\n", lconst->idx,
229                         values[0], values[1], values[2], values[3]);
230             }
231         }
232         /* Immediate constants are clamped for 1.X shaders at loading times */
233         ret = 0;
234         LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
235             dirty_consts[lconst->idx] = 1; /* Dirtify so the non-immediate constant overwrites it next time */
236             ret = max(ret, lconst->idx + 1);
237             GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, lconst->idx, (GLfloat*)lconst->value));
238         }
239         checkGLcall("glProgramEnvParameter4fvARB()");
240         return ret; /* The loaded immediate constants need reloading for the next shader */
241     } else {
242         return 0; /* No constants are dirty now */
243     }
244 }
245
246 /**
247  * Loads the texture dimensions for NP2 fixup into the currently set ARB_[vertex/fragment]_programs.
248  */
249 static void shader_arb_load_np2fixup_constants(
250     IWineD3DDevice* device,
251     char usePixelShader,
252     char useVertexShader) {
253     /* not implemented */
254 }
255
256 static inline void shader_arb_ps_local_constants(IWineD3DDeviceImpl* deviceImpl)
257 {
258     IWineD3DStateBlockImpl* stateBlock = deviceImpl->stateBlock;
259     IWineD3DBaseShaderImpl* pshader = (IWineD3DBaseShaderImpl*) stateBlock->pixelShader;
260     IWineD3DPixelShaderImpl *psi = (IWineD3DPixelShaderImpl *) pshader;
261     const WineD3D_GL_Info *gl_info = &deviceImpl->adapter->gl_info;
262     unsigned char i;
263
264     for(i = 0; i < psi->numbumpenvmatconsts; i++)
265     {
266         /* The state manager takes care that this function is always called if the bump env matrix changes */
267         const float *data = (const float *)&stateBlock->textureState[(int) psi->bumpenvmatconst[i].texunit][WINED3DTSS_BUMPENVMAT00];
268         GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, psi->bumpenvmatconst[i].const_num, data));
269
270         if (psi->luminanceconst[i].const_num != WINED3D_CONST_NUM_UNUSED)
271         {
272             /* WINED3DTSS_BUMPENVLSCALE and WINED3DTSS_BUMPENVLOFFSET are next to each other.
273              * point gl to the scale, and load 4 floats. x = scale, y = offset, z and w are junk, we
274              * don't care about them. The pointers are valid for sure because the stateblock is bigger.
275              * (they're WINED3DTSS_TEXTURETRANSFORMFLAGS and WINED3DTSS_ADDRESSW, so most likely 0 or NaN
276             */
277             const float *scale = (const float *)&stateBlock->textureState[(int) psi->luminanceconst[i].texunit][WINED3DTSS_BUMPENVLSCALE];
278             GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, psi->luminanceconst[i].const_num, scale));
279         }
280     }
281 }
282 /**
283  * Loads the app-supplied constants into the currently set ARB_[vertex/fragment]_programs.
284  * 
285  * We only support float constants in ARB at the moment, so don't 
286  * worry about the Integers or Booleans
287  */
288 /* GL locking is done by the caller (state handler) */
289 static void shader_arb_load_constants(
290     IWineD3DDevice* device,
291     char usePixelShader,
292     char useVertexShader) {
293    
294     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) device; 
295     IWineD3DStateBlockImpl* stateBlock = deviceImpl->stateBlock;
296     const WineD3D_GL_Info *gl_info = &deviceImpl->adapter->gl_info;
297
298     if (useVertexShader) {
299         IWineD3DBaseShaderImpl* vshader = (IWineD3DBaseShaderImpl*) stateBlock->vertexShader;
300
301         /* Load DirectX 9 float constants for vertex shader */
302         deviceImpl->highest_dirty_vs_const = shader_arb_load_constantsF(
303                 vshader, gl_info, GL_VERTEX_PROGRAM_ARB,
304                 deviceImpl->highest_dirty_vs_const,
305                 stateBlock->vertexShaderConstantF,
306                 deviceImpl->activeContext->vshader_const_dirty);
307
308         /* Upload the position fixup */
309         GL_EXTCALL(glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, ARB_SHADER_PRIVCONST_POS, deviceImpl->posFixup));
310     }
311
312     if (usePixelShader) {
313         IWineD3DBaseShaderImpl* pshader = (IWineD3DBaseShaderImpl*) stateBlock->pixelShader;
314
315         /* Load DirectX 9 float constants for pixel shader */
316         deviceImpl->highest_dirty_ps_const = shader_arb_load_constantsF(
317                 pshader, gl_info, GL_FRAGMENT_PROGRAM_ARB,
318                 deviceImpl->highest_dirty_ps_const,
319                 stateBlock->pixelShaderConstantF,
320                 deviceImpl->activeContext->pshader_const_dirty);
321         shader_arb_ps_local_constants(deviceImpl);
322     }
323 }
324
325 static void shader_arb_update_float_vertex_constants(IWineD3DDevice *iface, UINT start, UINT count)
326 {
327     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
328
329     /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
330      * context. On a context switch the old context will be fully dirtified */
331     memset(This->activeContext->vshader_const_dirty + start, 1,
332             sizeof(*This->activeContext->vshader_const_dirty) * count);
333     This->highest_dirty_vs_const = max(This->highest_dirty_vs_const, start + count + 1);
334 }
335
336 static void shader_arb_update_float_pixel_constants(IWineD3DDevice *iface, UINT start, UINT count)
337 {
338     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
339
340     /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
341      * context. On a context switch the old context will be fully dirtified */
342     memset(This->activeContext->pshader_const_dirty + start, 1,
343             sizeof(*This->activeContext->pshader_const_dirty) * count);
344     This->highest_dirty_ps_const = max(This->highest_dirty_ps_const, start + count + 1);
345 }
346
347 static DWORD *local_const_mapping(IWineD3DBaseShaderImpl *This)
348 {
349     DWORD *ret;
350     DWORD idx = 0;
351     const local_constant *lconst;
352
353     if(This->baseShader.load_local_constsF || list_empty(&This->baseShader.constantsF)) return NULL;
354
355     ret = HeapAlloc(GetProcessHeap(), 0, sizeof(DWORD) * This->baseShader.limits.temporary);
356     if(!ret) {
357         ERR("Out of memory\n");
358         return NULL;
359     }
360
361     LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
362         ret[lconst->idx] = idx++;
363     }
364     return ret;
365 }
366
367 /* Generate the variable & register declarations for the ARB_vertex_program output target */
368 static void shader_generate_arb_declarations(IWineD3DBaseShader *iface, const shader_reg_maps *reg_maps,
369         SHADER_BUFFER *buffer, const WineD3D_GL_Info *gl_info, DWORD *lconst_map)
370 {
371     IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
372     DWORD i, cur, next_local = 0;
373     char pshader = shader_is_pshader_version(reg_maps->shader_version.type);
374     unsigned max_constantsF;
375     const local_constant *lconst;
376
377     /* In pixel shaders, all private constants are program local, we don't need anything
378      * from program.env. Thus we can advertise the full set of constants in pixel shaders.
379      * If we need a private constant the GL implementation will squeeze it in somewhere
380      *
381      * With vertex shaders we need the posFixup and on some GL implementations 4 helper
382      * immediate values. The posFixup is loaded using program.env for now, so always
383      * subtract one from the number of constants. If the shader uses indirect addressing,
384      * account for the helper const too because we have to declare all availabke d3d constants
385      * and don't know which are actually used.
386      */
387     if(pshader) {
388         max_constantsF = GL_LIMITS(pshader_constantsF);
389     } else {
390         if(This->baseShader.reg_maps.usesrelconstF) {
391             max_constantsF = GL_LIMITS(vshader_constantsF) - reserved_vs_const(iface, gl_info);
392         } else {
393             max_constantsF = GL_LIMITS(vshader_constantsF) - 1;
394         }
395     }
396
397     for(i = 0; i < This->baseShader.limits.temporary; i++) {
398         if (reg_maps->temporary[i])
399             shader_addline(buffer, "TEMP R%u;\n", i);
400     }
401
402     for (i = 0; i < This->baseShader.limits.address; i++) {
403         if (reg_maps->address[i])
404             shader_addline(buffer, "ADDRESS A%d;\n", i);
405     }
406
407     if(pshader && reg_maps->shader_version.major == 1 && reg_maps->shader_version.minor <= 3) {
408         for(i = 0; i < This->baseShader.limits.texcoord; i++) {
409             if (reg_maps->texcoord[i] && pshader)
410                 shader_addline(buffer,"TEMP T%u;\n", i);
411         }
412     }
413
414     /* Load local constants using the program-local space,
415      * this avoids reloading them each time the shader is used
416      */
417     if(lconst_map) {
418         LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
419             shader_addline(buffer, "PARAM C%u = program.local[%u];\n", lconst->idx,
420                            lconst_map[lconst->idx]);
421             next_local = max(next_local, lconst_map[lconst->idx] + 1);
422         }
423     }
424
425     /* we use the array-based constants array if the local constants are marked for loading,
426      * because then we use indirect addressing, or when the local constant list is empty,
427      * because then we don't know if we're using indirect addressing or not. If we're hardcoding
428      * local constants do not declare the loaded constants as an array because ARB compilers usually
429      * do not optimize unused constants away
430      */
431     if(This->baseShader.reg_maps.usesrelconstF) {
432         /* Need to PARAM the environment parameters (constants) so we can use relative addressing */
433         shader_addline(buffer, "PARAM C[%d] = { program.env[0..%d] };\n",
434                     max_constantsF, max_constantsF - 1);
435     } else {
436         for(i = 0; i < max_constantsF; i++) {
437             DWORD idx, mask;
438             idx = i >> 5;
439             mask = 1 << (i & 0x1f);
440             if(!shader_constant_is_local(This, i) && (This->baseShader.reg_maps.constf[idx] & mask)) {
441                 shader_addline(buffer, "PARAM C%d = program.env[%d];\n",i, i);
442             }
443         }
444     }
445
446     for(i = 0; i < (sizeof(reg_maps->bumpmat) / sizeof(reg_maps->bumpmat[0])); i++) {
447         IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) This;
448         if(!reg_maps->bumpmat[i]) continue;
449
450         cur = ps->numbumpenvmatconsts;
451         ps->bumpenvmatconst[cur].const_num = -1;
452         ps->bumpenvmatconst[cur].texunit = i;
453         ps->luminanceconst[cur].const_num = -1;
454         ps->luminanceconst[cur].texunit = i;
455
456         /* We can fit the constants into the constant limit for sure because texbem, texbeml, bem and beml are only supported
457          * in 1.x shaders, and GL_ARB_fragment_program has a constant limit of 24 constants. So in the worst case we're loading
458          * 8 shader constants, 8 bump matrices and 8 luminance parameters and are perfectly fine. (No NP2 fixup on bumpmapped
459          * textures due to conditional NP2 restrictions)
460          *
461          * Use local constants to load the bump env parameters, not program.env. This avoids collisions with d3d constants of
462          * shaders in newer shader models. Since the bump env parameters have to share their space with NP2 fixup constants,
463          * their location is shader dependent anyway and they cannot be loaded globally.
464          */
465         ps->bumpenvmatconst[cur].const_num = next_local++;
466         shader_addline(buffer, "PARAM bumpenvmat%d = program.local[%d];\n",
467                        i, ps->bumpenvmatconst[cur].const_num);
468         ps->numbumpenvmatconsts = cur + 1;
469
470         if(!reg_maps->luminanceparams[i]) continue;
471
472         ((IWineD3DPixelShaderImpl *)This)->luminanceconst[cur].const_num = next_local++;
473         shader_addline(buffer, "PARAM luminance%d = program.local[%d];\n",
474                         i, ps->luminanceconst[cur].const_num);
475     }
476
477 }
478
479 static const char * const shift_tab[] = {
480     "dummy",     /*  0 (none) */
481     "coefmul.x", /*  1 (x2)   */
482     "coefmul.y", /*  2 (x4)   */
483     "coefmul.z", /*  3 (x8)   */
484     "coefmul.w", /*  4 (x16)  */
485     "dummy",     /*  5 (x32)  */
486     "dummy",     /*  6 (x64)  */
487     "dummy",     /*  7 (x128) */
488     "dummy",     /*  8 (d256) */
489     "dummy",     /*  9 (d128) */
490     "dummy",     /* 10 (d64)  */
491     "dummy",     /* 11 (d32)  */
492     "coefdiv.w", /* 12 (d16)  */
493     "coefdiv.z", /* 13 (d8)   */
494     "coefdiv.y", /* 14 (d4)   */
495     "coefdiv.x"  /* 15 (d2)   */
496 };
497
498 static void shader_arb_get_write_mask(const struct wined3d_shader_instruction *ins,
499         const struct wined3d_shader_dst_param *dst, char *write_mask)
500 {
501     char *ptr = write_mask;
502
503     if (dst->write_mask != WINED3DSP_WRITEMASK_ALL)
504     {
505         *ptr++ = '.';
506         if (dst->write_mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x';
507         if (dst->write_mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y';
508         if (dst->write_mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z';
509         if (dst->write_mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w';
510     }
511
512     *ptr = '\0';
513 }
514
515 static void shader_arb_get_swizzle(const struct wined3d_shader_src_param *param, BOOL fixup, char *swizzle_str)
516 {
517     /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
518      * but addressed as "rgba". To fix this we need to swap the register's x
519      * and z components. */
520     const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
521     char *ptr = swizzle_str;
522
523     /* swizzle bits fields: wwzzyyxx */
524     DWORD swizzle = param->swizzle;
525     DWORD swizzle_x = swizzle & 0x03;
526     DWORD swizzle_y = (swizzle >> 2) & 0x03;
527     DWORD swizzle_z = (swizzle >> 4) & 0x03;
528     DWORD swizzle_w = (swizzle >> 6) & 0x03;
529
530     /* If the swizzle is the default swizzle (ie, "xyzw"), we don't need to
531      * generate a swizzle string. Unless we need to our own swizzling. */
532     if (swizzle != WINED3DSP_NOSWIZZLE || fixup)
533     {
534         *ptr++ = '.';
535         if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) {
536             *ptr++ = swizzle_chars[swizzle_x];
537         } else {
538             *ptr++ = swizzle_chars[swizzle_x];
539             *ptr++ = swizzle_chars[swizzle_y];
540             *ptr++ = swizzle_chars[swizzle_z];
541             *ptr++ = swizzle_chars[swizzle_w];
542         }
543     }
544
545     *ptr = '\0';
546 }
547
548 static void shader_arb_request_a0(const struct wined3d_shader_instruction *ins, const char *src)
549 {
550     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
551     SHADER_BUFFER *buffer = ins->ctx->buffer;
552
553     if(strcmp(priv->addr_reg, src) == 0) return;
554
555     strcpy(priv->addr_reg, src);
556     shader_addline(buffer, "ARL A0.x, %s;\n", src);
557 }
558
559 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins,
560         const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr);
561
562 static void shader_arb_get_register_name(const struct wined3d_shader_instruction *ins,
563         const struct wined3d_shader_register *reg, char *register_name, BOOL *is_color)
564 {
565     /* oPos, oFog and oPts in D3D */
566     static const char * const rastout_reg_names[] = {"TMP_OUT", "result.fogcoord", "result.pointsize"};
567     IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
568     BOOL pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type);
569     struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
570
571     *is_color = FALSE;
572
573     switch (reg->type)
574     {
575         case WINED3DSPR_TEMP:
576             sprintf(register_name, "R%u", reg->idx);
577             break;
578
579         case WINED3DSPR_INPUT:
580             if (pshader)
581             {
582                 if (reg->idx == 0) strcpy(register_name, "fragment.color.primary");
583                 else strcpy(register_name, "fragment.color.secondary");
584             }
585             else
586             {
587                 if (ctx->cur_vs_args->super.swizzle_map & (1 << reg->idx)) *is_color = TRUE;
588                 sprintf(register_name, "vertex.attrib[%u]", reg->idx);
589             }
590             break;
591
592         case WINED3DSPR_CONST:
593             if (!pshader && reg->rel_addr)
594             {
595                 char rel_reg[50];
596                 UINT rel_offset = ((IWineD3DVertexShaderImpl *)This)->rel_offset;
597                 if(This->baseShader.reg_maps.shader_version.major < 2) {
598                     sprintf(rel_reg, "A0.x");
599                 } else {
600                     shader_arb_get_src_param(ins, reg->rel_addr, 0, rel_reg);
601                     if(ctx->target_version == ARB) {
602                         shader_arb_request_a0(ins, rel_reg);
603                         sprintf(rel_reg, "A0.x");
604                     }
605                 }
606                 if (reg->idx >= rel_offset)
607                     sprintf(register_name, "C[%s + %u]", rel_reg, reg->idx - rel_offset);
608                 else
609                     sprintf(register_name, "C[%s - %u]", rel_reg, -reg->idx + rel_offset);
610             }
611             else
612             {
613                 if (This->baseShader.reg_maps.usesrelconstF)
614                     sprintf(register_name, "C[%u]", reg->idx);
615                 else
616                     sprintf(register_name, "C%u", reg->idx);
617             }
618             break;
619
620         case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
621             if (pshader) {
622                 if(This->baseShader.reg_maps.shader_version.major == 1 &&
623                    This->baseShader.reg_maps.shader_version.minor <= 3) {
624                     /* In ps <= 1.3, Tx is a temporary register as destination to all instructions,
625                      * and as source to most instructions. For some instructions it is the texcoord
626                      * input. Those instructions know about the special use
627                      */
628                     sprintf(register_name, "T%u", reg->idx);
629                 } else {
630                     /* in ps 1.4 and 2.x Tx is always a (read-only) varying */
631                     sprintf(register_name, "fragment.texcoord[%u]", reg->idx);
632                 }
633             }
634             else
635             {
636                 if(This->baseShader.reg_maps.shader_version.major == 1 || ctx->target_version >= NV2)
637                 {
638                     sprintf(register_name, "A%u", reg->idx);
639                 }
640                 else
641                 {
642                     sprintf(register_name, "A%u_SHADOW", reg->idx);
643                 }
644             }
645             break;
646
647         case WINED3DSPR_COLOROUT:
648             if (reg->idx == 0)
649             {
650                 if(ctx->cur_ps_args->super.srgb_correction)
651                 {
652                     strcpy(register_name, "TMP_COLOR");
653                 }
654                 else
655                 {
656                     strcpy(register_name, "result.color");
657                 }
658             }
659             else
660             {
661                 /* TODO: See GL_ARB_draw_buffers */
662                 FIXME("Unsupported write to render target %u\n", reg->idx);
663                 sprintf(register_name, "unsupported_register");
664             }
665             break;
666
667         case WINED3DSPR_RASTOUT:
668             sprintf(register_name, "%s", rastout_reg_names[reg->idx]);
669             break;
670
671         case WINED3DSPR_DEPTHOUT:
672             strcpy(register_name, "result.depth");
673             break;
674
675         case WINED3DSPR_ATTROUT:
676             if (pshader) sprintf(register_name, "oD[%u]", reg->idx);
677             else if (reg->idx == 0) strcpy(register_name, "result.color.primary");
678             else strcpy(register_name, "result.color.secondary");
679             break;
680
681         case WINED3DSPR_TEXCRDOUT:
682             if (pshader) sprintf(register_name, "oT[%u]", reg->idx);
683             else sprintf(register_name, "result.texcoord[%u]", reg->idx);
684             break;
685
686         default:
687             FIXME("Unhandled register type %#x[%u]\n", reg->type, reg->idx);
688             sprintf(register_name, "unrecognized_register[%u]", reg->idx);
689             break;
690     }
691 }
692
693 static void shader_arb_get_dst_param(const struct wined3d_shader_instruction *ins,
694         const struct wined3d_shader_dst_param *wined3d_dst, char *str)
695 {
696     char register_name[255];
697     char write_mask[6];
698     BOOL is_color;
699
700     shader_arb_get_register_name(ins, &wined3d_dst->reg, register_name, &is_color);
701     strcpy(str, register_name);
702
703     shader_arb_get_write_mask(ins, wined3d_dst, write_mask);
704     strcat(str, write_mask);
705 }
706
707 static const char *shader_arb_get_fixup_swizzle(enum fixup_channel_source channel_source)
708 {
709     switch(channel_source)
710     {
711         case CHANNEL_SOURCE_ZERO: return "0";
712         case CHANNEL_SOURCE_ONE: return "1";
713         case CHANNEL_SOURCE_X: return "x";
714         case CHANNEL_SOURCE_Y: return "y";
715         case CHANNEL_SOURCE_Z: return "z";
716         case CHANNEL_SOURCE_W: return "w";
717         default:
718             FIXME("Unhandled channel source %#x\n", channel_source);
719             return "undefined";
720     }
721 }
722
723 static void gen_color_correction(SHADER_BUFFER *buffer, const char *reg, DWORD dst_mask,
724                                  const char *one, const char *two, struct color_fixup_desc fixup)
725 {
726     DWORD mask;
727
728     if (is_yuv_fixup(fixup))
729     {
730         enum yuv_fixup yuv_fixup = get_yuv_fixup(fixup);
731         FIXME("YUV fixup (%#x) not supported\n", yuv_fixup);
732         return;
733     }
734
735     mask = 0;
736     if (fixup.x_source != CHANNEL_SOURCE_X) mask |= WINED3DSP_WRITEMASK_0;
737     if (fixup.y_source != CHANNEL_SOURCE_Y) mask |= WINED3DSP_WRITEMASK_1;
738     if (fixup.z_source != CHANNEL_SOURCE_Z) mask |= WINED3DSP_WRITEMASK_2;
739     if (fixup.w_source != CHANNEL_SOURCE_W) mask |= WINED3DSP_WRITEMASK_3;
740     mask &= dst_mask;
741
742     if (mask)
743     {
744         shader_addline(buffer, "SWZ %s, %s, %s, %s, %s, %s;\n", reg, reg,
745                 shader_arb_get_fixup_swizzle(fixup.x_source), shader_arb_get_fixup_swizzle(fixup.y_source),
746                 shader_arb_get_fixup_swizzle(fixup.z_source), shader_arb_get_fixup_swizzle(fixup.w_source));
747     }
748
749     mask = 0;
750     if (fixup.x_sign_fixup) mask |= WINED3DSP_WRITEMASK_0;
751     if (fixup.y_sign_fixup) mask |= WINED3DSP_WRITEMASK_1;
752     if (fixup.z_sign_fixup) mask |= WINED3DSP_WRITEMASK_2;
753     if (fixup.w_sign_fixup) mask |= WINED3DSP_WRITEMASK_3;
754     mask &= dst_mask;
755
756     if (mask)
757     {
758         char reg_mask[6];
759         char *ptr = reg_mask;
760
761         if (mask != WINED3DSP_WRITEMASK_ALL)
762         {
763             *ptr++ = '.';
764             if (mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x';
765             if (mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y';
766             if (mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z';
767             if (mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w';
768         }
769         *ptr = '\0';
770
771         shader_addline(buffer, "MAD %s%s, %s, %s, -%s;\n", reg, reg_mask, reg, two, one);
772     }
773 }
774
775 static void shader_hw_sample(const struct wined3d_shader_instruction *ins, DWORD sampler_idx,
776         const char *dst_str, const char *coord_reg, BOOL projected, BOOL bias)
777 {
778     SHADER_BUFFER *buffer = ins->ctx->buffer;
779     DWORD sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
780     const char *tex_type;
781     IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
782     IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) This->baseShader.device;
783     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
784
785     switch(sampler_type) {
786         case WINED3DSTT_1D:
787             tex_type = "1D";
788             break;
789
790         case WINED3DSTT_2D:
791             if(device->stateBlock->textures[sampler_idx] &&
792                IWineD3DBaseTexture_GetTextureDimensions(device->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
793                 tex_type = "RECT";
794             } else {
795                 tex_type = "2D";
796             }
797             if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
798             {
799                 if(priv->cur_ps_args->super.np2_fixup & (1 << sampler_idx))
800                 {
801                     FIXME("NP2 texcoord fixup is currently not implemented in ARB mode (use GLSL instead).\n");
802                 }
803             }
804             break;
805
806         case WINED3DSTT_VOLUME:
807             tex_type = "3D";
808             break;
809
810         case WINED3DSTT_CUBE:
811             tex_type = "CUBE";
812             break;
813
814         default:
815             ERR("Unexpected texture type %d\n", sampler_type);
816             tex_type = "";
817     }
818
819     if (bias) {
820         /* Shouldn't be possible, but let's check for it */
821         if(projected) FIXME("Biased and Projected texture sampling\n");
822         /* TXB takes the 4th component of the source vector automatically, as d3d. Nothing more to do */
823         shader_addline(buffer, "TXB %s, %s, texture[%u], %s;\n", dst_str, coord_reg, sampler_idx, tex_type);
824     } else if (projected) {
825         shader_addline(buffer, "TXP %s, %s, texture[%u], %s;\n", dst_str, coord_reg, sampler_idx, tex_type);
826     } else {
827         shader_addline(buffer, "TEX %s, %s, texture[%u], %s;\n", dst_str, coord_reg, sampler_idx, tex_type);
828     }
829
830     if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
831     {
832         gen_color_correction(buffer, dst_str, ins->dst[0].write_mask,
833                 "one", "coefmul.x", priv->cur_ps_args->super.color_fixup[sampler_idx]);
834     }
835 }
836
837 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins,
838         const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr)
839 {
840     /* Generate a line that does the input modifier computation and return the input register to use */
841     BOOL is_color = FALSE;
842     char regstr[256];
843     char swzstr[20];
844     int insert_line;
845     SHADER_BUFFER *buffer = ins->ctx->buffer;
846     struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
847
848     /* Assume a new line will be added */
849     insert_line = 1;
850
851     /* Get register name */
852     shader_arb_get_register_name(ins, &src->reg, regstr, &is_color);
853     shader_arb_get_swizzle(src, is_color, swzstr);
854
855     switch (src->modifiers)
856     {
857     case WINED3DSPSM_NONE:
858         sprintf(outregstr, "%s%s", regstr, swzstr);
859         insert_line = 0;
860         break;
861     case WINED3DSPSM_NEG:
862         sprintf(outregstr, "-%s%s", regstr, swzstr);
863         insert_line = 0;
864         break;
865     case WINED3DSPSM_BIAS:
866         shader_addline(buffer, "ADD T%c, %s, -coefdiv.x;\n", 'A' + tmpreg, regstr);
867         break;
868     case WINED3DSPSM_BIASNEG:
869         shader_addline(buffer, "ADD T%c, -%s, coefdiv.x;\n", 'A' + tmpreg, regstr);
870         break;
871     case WINED3DSPSM_SIGN:
872         shader_addline(buffer, "MAD T%c, %s, coefmul.x, -one.x;\n", 'A' + tmpreg, regstr);
873         break;
874     case WINED3DSPSM_SIGNNEG:
875         shader_addline(buffer, "MAD T%c, %s, -coefmul.x, one.x;\n", 'A' + tmpreg, regstr);
876         break;
877     case WINED3DSPSM_COMP:
878         shader_addline(buffer, "SUB T%c, one.x, %s;\n", 'A' + tmpreg, regstr);
879         break;
880     case WINED3DSPSM_X2:
881         shader_addline(buffer, "ADD T%c, %s, %s;\n", 'A' + tmpreg, regstr, regstr);
882         break;
883     case WINED3DSPSM_X2NEG:
884         shader_addline(buffer, "ADD T%c, -%s, -%s;\n", 'A' + tmpreg, regstr, regstr);
885         break;
886     case WINED3DSPSM_DZ:
887         shader_addline(buffer, "RCP T%c, %s.z;\n", 'A' + tmpreg, regstr);
888         shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg);
889         break;
890     case WINED3DSPSM_DW:
891         shader_addline(buffer, "RCP T%c, %s.w;\n", 'A' + tmpreg, regstr);
892         shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg);
893         break;
894     case WINED3DSPSM_ABS:
895         if(ctx->target_version >= NV2) {
896             sprintf(outregstr, "|%s%s|", regstr, swzstr);
897             insert_line = 0;
898         } else {
899             shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr);
900         }
901         break;
902     case WINED3DSPSM_ABSNEG:
903         if(ctx->target_version >= NV2) {
904             sprintf(outregstr, "-|%s%s|", regstr, swzstr);
905         } else {
906             shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr);
907             sprintf(outregstr, "-T%c%s", 'A' + tmpreg, swzstr);
908         }
909         insert_line = 0;
910         break;
911     default:
912         sprintf(outregstr, "%s%s", regstr, swzstr);
913         insert_line = 0;
914     }
915
916     /* Return modified or original register, with swizzle */
917     if (insert_line)
918         sprintf(outregstr, "T%c%s", 'A' + tmpreg, swzstr);
919 }
920
921 static const char *shader_arb_get_modifier(const struct wined3d_shader_instruction *ins)
922 {
923     DWORD mod;
924     const char *ret = "";
925     if (!ins->dst_count) return "";
926
927     mod = ins->dst[0].modifiers;
928     if(mod & WINED3DSPDM_SATURATE) {
929         ret = "_SAT";
930         mod &= ~WINED3DSPDM_SATURATE;
931     }
932     if(mod & WINED3DSPDM_PARTIALPRECISION) {
933         FIXME("Unhandled modifier WINED3DSPDM_PARTIALPRECISION\n");
934         mod &= ~WINED3DSPDM_PARTIALPRECISION;
935     }
936     if(mod & WINED3DSPDM_MSAMPCENTROID) {
937         FIXME("Unhandled modifier WINED3DSPDM_MSAMPCENTROID\n");
938         mod &= ~WINED3DSPDM_MSAMPCENTROID;
939     }
940     if(mod) {
941         FIXME("Unknown modifiers 0x%08x\n", mod);
942     }
943     return ret;
944 }
945
946 static void pshader_hw_bem(const struct wined3d_shader_instruction *ins)
947 {
948     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
949     SHADER_BUFFER *buffer = ins->ctx->buffer;
950     char dst_name[50];
951     char src_name[2][50];
952     DWORD sampler_code = dst->reg.idx;
953
954     shader_arb_get_dst_param(ins, dst, dst_name);
955
956     /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed
957      *
958      * Keep in mind that src_name[1] can be "TB" and src_name[0] can be "TA" because modifiers like _x2 are valid
959      * with bem. So delay loading the first parameter until after the perturbation calculation which needs two
960      * temps is done.
961      */
962     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
963     shader_addline(buffer, "SWZ TA, bumpenvmat%d, x, z, 0, 0;\n", sampler_code);
964     shader_addline(buffer, "DP3 TC.r, TA, %s;\n", src_name[1]);
965     shader_addline(buffer, "SWZ TA, bumpenvmat%d, y, w, 0, 0;\n", sampler_code);
966     shader_addline(buffer, "DP3 TC.g, TA, %s;\n", src_name[1]);
967
968     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
969     shader_addline(buffer, "ADD %s, %s, TC;\n", dst_name, src_name[0]);
970 }
971
972 static void pshader_hw_cnd(const struct wined3d_shader_instruction *ins)
973 {
974     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
975     SHADER_BUFFER *buffer = ins->ctx->buffer;
976     char dst_name[50];
977     char src_name[3][50];
978     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
979             ins->ctx->reg_maps->shader_version.minor);
980
981     shader_arb_get_dst_param(ins, dst, dst_name);
982     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
983
984     /* The coissue flag changes the semantic of the cnd instruction in <= 1.3 shaders */
985     if (shader_version <= WINED3D_SHADER_VERSION(1, 3) && ins->coissue)
986     {
987         shader_addline(buffer, "MOV%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[1]);
988     } else {
989         shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
990         shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
991         shader_addline(buffer, "ADD TA, -%s, coefdiv.x;\n", src_name[0]);
992         shader_addline(buffer, "CMP%s %s, TA, %s, %s;\n",
993                        shader_arb_get_modifier(ins), dst_name, src_name[1], src_name[2]);
994     }
995 }
996
997 static void pshader_hw_cmp(const struct wined3d_shader_instruction *ins)
998 {
999     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1000     SHADER_BUFFER *buffer = ins->ctx->buffer;
1001     char dst_name[50];
1002     char src_name[3][50];
1003
1004     shader_arb_get_dst_param(ins, dst, dst_name);
1005
1006     /* Generate input register names (with modifiers) */
1007     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
1008     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
1009     shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
1010
1011     shader_addline(buffer, "CMP%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), dst_name,
1012                    src_name[0], src_name[2], src_name[1]);
1013 }
1014
1015 /** Process the WINED3DSIO_DP2ADD instruction in ARB.
1016  * dst = dot2(src0, src1) + src2 */
1017 static void pshader_hw_dp2add(const struct wined3d_shader_instruction *ins)
1018 {
1019     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1020     SHADER_BUFFER *buffer = ins->ctx->buffer;
1021     char dst_name[50];
1022     char src_name[3][50];
1023
1024     shader_arb_get_dst_param(ins, dst, dst_name);
1025     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
1026     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
1027     shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
1028
1029     /* Emulate a DP2 with a DP3 and 0.0. Don't use the dest as temp register, it could be src[1] or src[2]
1030      * src_name[0] can be TA, but TA is a private temp for modifiers, so it is save to overwrite
1031      */
1032     shader_addline(buffer, "MOV TA, %s;\n", src_name[0]);
1033     shader_addline(buffer, "MOV TA.z, 0.0;\n");
1034     shader_addline(buffer, "DP3 TA, TA, %s;\n", src_name[1]);
1035     shader_addline(buffer, "ADD%s %s, TA, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[2]);
1036 }
1037
1038 /* Map the opcode 1-to-1 to the GL code */
1039 static void shader_hw_map2gl(const struct wined3d_shader_instruction *ins)
1040 {
1041     SHADER_BUFFER *buffer = ins->ctx->buffer;
1042     const char *instruction;
1043     char arguments[256], dst_str[50];
1044     unsigned int i;
1045     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1046
1047     switch (ins->handler_idx)
1048     {
1049         case WINED3DSIH_ABS: instruction = "ABS"; break;
1050         case WINED3DSIH_ADD: instruction = "ADD"; break;
1051         case WINED3DSIH_CRS: instruction = "XPD"; break;
1052         case WINED3DSIH_DP3: instruction = "DP3"; break;
1053         case WINED3DSIH_DP4: instruction = "DP4"; break;
1054         case WINED3DSIH_DST: instruction = "DST"; break;
1055         case WINED3DSIH_EXP: instruction = "EX2"; break;
1056         case WINED3DSIH_EXPP: instruction = "EXP"; break;
1057         case WINED3DSIH_FRC: instruction = "FRC"; break;
1058         case WINED3DSIH_LIT: instruction = "LIT"; break;
1059         case WINED3DSIH_LOG: instruction = "LG2"; break;
1060         case WINED3DSIH_LOGP: instruction = "LOG"; break;
1061         case WINED3DSIH_LRP: instruction = "LRP"; break;
1062         case WINED3DSIH_MAD: instruction = "MAD"; break;
1063         case WINED3DSIH_MAX: instruction = "MAX"; break;
1064         case WINED3DSIH_MIN: instruction = "MIN"; break;
1065         case WINED3DSIH_MOV: instruction = "MOV"; break;
1066         case WINED3DSIH_MUL: instruction = "MUL"; break;
1067         case WINED3DSIH_POW: instruction = "POW"; break;
1068         case WINED3DSIH_SGE: instruction = "SGE"; break;
1069         case WINED3DSIH_SLT: instruction = "SLT"; break;
1070         case WINED3DSIH_SUB: instruction = "SUB"; break;
1071         case WINED3DSIH_MOVA:instruction = "ARR"; break;
1072         case WINED3DSIH_SGN: instruction = "SSG"; break;
1073         case WINED3DSIH_DSX: instruction = "DDX"; break;
1074         default: instruction = "";
1075             FIXME("Unhandled opcode %#x\n", ins->handler_idx);
1076             break;
1077     }
1078
1079     /* Note that shader_arb_add_dst_param() adds spaces. */
1080     arguments[0] = '\0';
1081     shader_arb_get_dst_param(ins, dst, dst_str);
1082     for (i = 0; i < ins->src_count; ++i)
1083     {
1084         char operand[100];
1085         strcat(arguments, ", ");
1086         shader_arb_get_src_param(ins, &ins->src[i], i, operand);
1087         strcat(arguments, operand);
1088     }
1089     shader_addline(buffer, "%s%s %s%s;\n", instruction, shader_arb_get_modifier(ins), dst_str, arguments);
1090 }
1091
1092 static void shader_hw_nop(const struct wined3d_shader_instruction *ins)
1093 {
1094     SHADER_BUFFER *buffer = ins->ctx->buffer;
1095     shader_addline(buffer, "NOP;\n");
1096 }
1097
1098 static void shader_hw_mov(const struct wined3d_shader_instruction *ins)
1099 {
1100     IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
1101
1102     SHADER_BUFFER *buffer = ins->ctx->buffer;
1103     char src0_param[256];
1104
1105     if(ins->handler_idx == WINED3DSIH_MOVA) {
1106         struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1107         struct wined3d_shader_src_param tmp_src = ins->src[0];
1108         char write_mask[6];
1109
1110         if(ctx->target_version >= NV2) {
1111             shader_hw_map2gl(ins);
1112             return;
1113         }
1114         tmp_src.swizzle = (tmp_src.swizzle & 0x3) * 0x55;
1115         shader_arb_get_src_param(ins, &tmp_src, 0, src0_param);
1116         shader_arb_get_write_mask(ins, &ins->dst[0], write_mask);
1117
1118         /* This implements the mova formula used in GLSL. The first two instructions
1119          * prepare the sign() part. Note that it is fine to have my_sign(0.0) = 1.0
1120          * in this case:
1121          * mova A0.x, 0.0
1122          *
1123          * A0.x = arl(floor(abs(0.0) + 0.5) * 1.0) = floor(0.5) = 0.0 since arl does a floor
1124          *
1125          * The ARL is performed when A0 is used - the requested component is read from A0_SHADOW into
1126          * A0.x. We can use the overwritten component of A0_shadow as temporary storage for the sign.
1127          */
1128         shader_addline(buffer, "SGE A0_SHADOW%s, %s, mova_const.y;\n", write_mask, src0_param);
1129         shader_addline(buffer, "MAD A0_SHADOW%s, A0_SHADOW, mova_const.z, -mova_const.w;\n", write_mask);
1130
1131         shader_addline(buffer, "ABS TA%s, %s;\n", write_mask, src0_param);
1132         shader_addline(buffer, "ADD TA%s, TA, mova_const.x;\n", write_mask);
1133         shader_addline(buffer, "FLR TA%s, TA;\n", write_mask);
1134         shader_addline(buffer, "MUL A0_SHADOW%s, TA, A0_SHADOW;\n", write_mask);
1135
1136         ((struct shader_arb_ctx_priv *)ins->ctx->backend_data)->addr_reg[0] = '\0';
1137     } else if (ins->ctx->reg_maps->shader_version.major == 1
1138           && !shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)
1139           && ins->dst[0].reg.type == WINED3DSPR_ADDR)
1140     {
1141         src0_param[0] = '\0';
1142         if (((IWineD3DVertexShaderImpl *)shader)->rel_offset)
1143         {
1144             shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param);
1145             shader_addline(buffer, "ADD TA.x, %s, helper_const.z;\n", src0_param);
1146             shader_addline(buffer, "ARL A0.x, TA.x;\n");
1147         }
1148         else
1149         {
1150             /* Apple's ARB_vertex_program implementation does not accept an ARL source argument
1151              * with more than one component. Thus replicate the first source argument over all
1152              * 4 components. For example, .xyzw -> .x (or better: .xxxx), .zwxy -> .z, etc) */
1153             struct wined3d_shader_src_param tmp_src = ins->src[0];
1154             tmp_src.swizzle = (tmp_src.swizzle & 0x3) * 0x55;
1155             shader_arb_get_src_param(ins, &tmp_src, 0, src0_param);
1156             shader_addline(buffer, "ARL A0.x, %s;\n", src0_param);
1157         }
1158     }
1159     else
1160     {
1161         shader_hw_map2gl(ins);
1162     }
1163 }
1164
1165 static void pshader_hw_texkill(const struct wined3d_shader_instruction *ins)
1166 {
1167     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1168     SHADER_BUFFER *buffer = ins->ctx->buffer;
1169     char reg_dest[40];
1170
1171     /* No swizzles are allowed in d3d's texkill. PS 1.x ignores the 4th component as documented,
1172      * but >= 2.0 honors it(undocumented, but tested by the d3d9 testsuit)
1173      */
1174     shader_arb_get_dst_param(ins, dst, reg_dest);
1175
1176     if (ins->ctx->reg_maps->shader_version.major >= 2)
1177     {
1178         /* The arb backend doesn't claim ps 2.0 support, but try to eat what the app feeds to us */
1179         shader_arb_get_dst_param(ins, dst, reg_dest);
1180         shader_addline(buffer, "KIL %s;\n", reg_dest);
1181     } else {
1182         /* ARB fp doesn't like swizzles on the parameter of the KIL instruction. To mask the 4th component,
1183          * copy the register into our general purpose TMP variable, overwrite .w and pass TMP to KIL
1184          *
1185          * ps_1_3 shaders use the texcoord incarnation of the Tx register. ps_1_4 shaders can use the same,
1186          * or pass in any temporary register(in shader phase 2)
1187          */
1188         if(ins->ctx->reg_maps->shader_version.minor <= 3) {
1189             sprintf(reg_dest, "fragment.texcoord[%u]", dst->reg.idx);
1190         } else {
1191             shader_arb_get_dst_param(ins, dst, reg_dest);
1192         }
1193         shader_addline(buffer, "SWZ TA, %s, x, y, z, 1;\n", reg_dest);
1194         shader_addline(buffer, "KIL TA;\n");
1195     }
1196 }
1197
1198 static void pshader_hw_tex(const struct wined3d_shader_instruction *ins)
1199 {
1200     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
1201     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
1202     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1203     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
1204             ins->ctx->reg_maps->shader_version.minor);
1205     BOOL projected = FALSE, bias = FALSE;
1206     struct wined3d_shader_src_param src;
1207
1208     char reg_dest[40];
1209     char reg_coord[40];
1210     DWORD reg_sampler_code;
1211
1212     /* All versions have a destination register */
1213     shader_arb_get_dst_param(ins, dst, reg_dest);
1214
1215     /* 1.0-1.4: Use destination register number as texture code.
1216        2.0+: Use provided sampler number as texure code. */
1217     if (shader_version < WINED3D_SHADER_VERSION(2,0))
1218         reg_sampler_code = dst->reg.idx;
1219     else
1220         reg_sampler_code = ins->src[1].reg.idx;
1221
1222     /* 1.0-1.3: Use the texcoord varying.
1223        1.4+: Use provided coordinate source register. */
1224     if (shader_version < WINED3D_SHADER_VERSION(1,4))
1225         sprintf(reg_coord, "fragment.texcoord[%u]", reg_sampler_code);
1226     else {
1227         /* TEX is the only instruction that can handle DW and DZ natively */
1228         src = ins->src[0];
1229         if(src.modifiers == WINED3DSPSM_DW) src.modifiers = WINED3DSPSM_NONE;
1230         if(src.modifiers == WINED3DSPSM_DZ) src.modifiers = WINED3DSPSM_NONE;
1231         shader_arb_get_src_param(ins, &src, 0, reg_coord);
1232     }
1233
1234     /* projection flag:
1235      * 1.1, 1.2, 1.3: Use WINED3DTSS_TEXTURETRANSFORMFLAGS
1236      * 1.4: Use WINED3DSPSM_DZ or WINED3DSPSM_DW on src[0]
1237      * 2.0+: Use WINED3DSI_TEXLD_PROJECT on the opcode
1238      */
1239     if (shader_version < WINED3D_SHADER_VERSION(1,4))
1240     {
1241         DWORD flags = 0;
1242         if(reg_sampler_code < MAX_TEXTURES) {
1243             flags = deviceImpl->stateBlock->textureState[reg_sampler_code][WINED3DTSS_TEXTURETRANSFORMFLAGS];
1244         }
1245         if (flags & WINED3DTTFF_PROJECTED) {
1246             projected = TRUE;
1247         }
1248     }
1249     else if (shader_version < WINED3D_SHADER_VERSION(2,0))
1250     {
1251         DWORD src_mod = ins->src[0].modifiers;
1252         if (src_mod == WINED3DSPSM_DZ) {
1253             /* TXP cannot handle DZ natively, so move the z coordinate to .w. reg_coord is a read-only
1254              * varying register, so we need a temp reg
1255              */
1256             shader_addline(ins->ctx->buffer, "SWZ TA, %s, x, y, z, z;\n", reg_coord);
1257             strcpy(reg_coord, "TA");
1258             projected = TRUE;
1259         } else if(src_mod == WINED3DSPSM_DW) {
1260             projected = TRUE;
1261         }
1262     } else {
1263         if (ins->flags & WINED3DSI_TEXLD_PROJECT) projected = TRUE;
1264         if (ins->flags & WINED3DSI_TEXLD_BIAS) bias = TRUE;
1265     }
1266     shader_hw_sample(ins, reg_sampler_code, reg_dest, reg_coord, projected, bias);
1267 }
1268
1269 static void pshader_hw_texcoord(const struct wined3d_shader_instruction *ins)
1270 {
1271     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1272     SHADER_BUFFER *buffer = ins->ctx->buffer;
1273     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
1274             ins->ctx->reg_maps->shader_version.minor);
1275     char dst_str[50];
1276
1277     if (shader_version < WINED3D_SHADER_VERSION(1,4))
1278     {
1279         DWORD reg = dst->reg.idx;
1280
1281         shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
1282         shader_addline(buffer, "MOV_SAT %s, fragment.texcoord[%u];\n", dst_str, reg);
1283     } else {
1284         char reg_src[40];
1285
1286         shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src);
1287         shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
1288         shader_addline(buffer, "MOV %s, %s;\n", dst_str, reg_src);
1289    }
1290 }
1291
1292 static void pshader_hw_texreg2ar(const struct wined3d_shader_instruction *ins)
1293 {
1294      SHADER_BUFFER *buffer = ins->ctx->buffer;
1295      IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
1296      IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
1297      DWORD flags;
1298
1299      DWORD reg1 = ins->dst[0].reg.idx;
1300      char dst_str[50];
1301      char src_str[50];
1302
1303      /* Note that texreg2ar treats Tx as a temporary register, not as a varying */
1304      shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
1305      shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
1306      /* Move .x first in case src_str is "TA" */
1307      shader_addline(buffer, "MOV TA.y, %s.x;\n", src_str);
1308      shader_addline(buffer, "MOV TA.x, %s.w;\n", src_str);
1309      flags = reg1 < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg1][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
1310      shader_hw_sample(ins, reg1, dst_str, "TA", flags & WINED3DTTFF_PROJECTED, FALSE);
1311 }
1312
1313 static void pshader_hw_texreg2gb(const struct wined3d_shader_instruction *ins)
1314 {
1315      SHADER_BUFFER *buffer = ins->ctx->buffer;
1316
1317      DWORD reg1 = ins->dst[0].reg.idx;
1318      char dst_str[50];
1319      char src_str[50];
1320
1321      /* Note that texreg2gb treats Tx as a temporary register, not as a varying */
1322      shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
1323      shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
1324      shader_addline(buffer, "MOV TA.x, %s.y;\n", src_str);
1325      shader_addline(buffer, "MOV TA.y, %s.z;\n", src_str);
1326      shader_hw_sample(ins, reg1, dst_str, "TA", FALSE, FALSE);
1327 }
1328
1329 static void pshader_hw_texreg2rgb(const struct wined3d_shader_instruction *ins)
1330 {
1331     DWORD reg1 = ins->dst[0].reg.idx;
1332     char dst_str[50];
1333     char src_str[50];
1334
1335     /* Note that texreg2rg treats Tx as a temporary register, not as a varying */
1336     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
1337     shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
1338     shader_hw_sample(ins, reg1, dst_str, src_str, FALSE, FALSE);
1339 }
1340
1341 static void pshader_hw_texbem(const struct wined3d_shader_instruction *ins)
1342 {
1343     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
1344     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1345     SHADER_BUFFER *buffer = ins->ctx->buffer;
1346     char reg_coord[40], dst_reg[50], src_reg[50];
1347     DWORD reg_dest_code;
1348
1349     /* All versions have a destination register. The Tx where the texture coordinates come
1350      * from is the varying incarnation of the texture register
1351      */
1352     reg_dest_code = dst->reg.idx;
1353     shader_arb_get_dst_param(ins, &ins->dst[0], dst_reg);
1354     shader_arb_get_src_param(ins, &ins->src[0], 0, src_reg);
1355     sprintf(reg_coord, "fragment.texcoord[%u]", reg_dest_code);
1356
1357     /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed
1358      * The Tx in which the perturbation map is stored is the tempreg incarnation of the texture register
1359      */
1360     shader_addline(buffer, "SWZ TB, bumpenvmat%d, x, z, 0, 0;\n", reg_dest_code);
1361     shader_addline(buffer, "DP3 TA.x, TB, %s;\n", src_reg);
1362     shader_addline(buffer, "SWZ TB, bumpenvmat%d, y, w, 0, 0;\n", reg_dest_code);
1363     shader_addline(buffer, "DP3 TA.y, TB, %s;\n", src_reg);
1364
1365     /* with projective textures, texbem only divides the static texture coord, not the displacement,
1366      * so we can't let the GL handle this.
1367      */
1368     if (((IWineD3DDeviceImpl*) This->baseShader.device)->stateBlock->textureState[reg_dest_code][WINED3DTSS_TEXTURETRANSFORMFLAGS]
1369             & WINED3DTTFF_PROJECTED) {
1370         shader_addline(buffer, "RCP TB.w, %s.w;\n", reg_coord);
1371         shader_addline(buffer, "MUL TB.xy, %s, TB.w;\n", reg_coord);
1372         shader_addline(buffer, "ADD TA.xy, TA, TB;\n");
1373     } else {
1374         shader_addline(buffer, "ADD TA.xy, TA, %s;\n", reg_coord);
1375     }
1376
1377     shader_hw_sample(ins, reg_dest_code, dst_reg, "TA", FALSE, FALSE);
1378
1379     if (ins->handler_idx == WINED3DSIH_TEXBEML)
1380     {
1381         /* No src swizzles are allowed, so this is ok */
1382         shader_addline(buffer, "MAD TA, %s.z, luminance%d.x, luminance%d.y;\n",
1383                        src_reg, reg_dest_code, reg_dest_code);
1384         shader_addline(buffer, "MUL %s, %s, TA;\n", dst_reg, dst_reg);
1385     }
1386 }
1387
1388 static void pshader_hw_texm3x2pad(const struct wined3d_shader_instruction *ins)
1389 {
1390     DWORD reg = ins->dst[0].reg.idx;
1391     SHADER_BUFFER *buffer = ins->ctx->buffer;
1392     char src0_name[50], dst_name[50];
1393     BOOL is_color;
1394     struct wined3d_shader_register tmp_reg = ins->dst[0].reg;
1395
1396     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
1397     /* The next instruction will be a texm3x2tex or texm3x2depth that writes to the uninitialized
1398      * T<reg+1> register. Use this register to store the calculated vector
1399      */
1400     tmp_reg.idx = reg + 1;
1401     shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color);
1402     shader_addline(buffer, "DP3 %s.x, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name);
1403 }
1404
1405 static void pshader_hw_texm3x2tex(const struct wined3d_shader_instruction *ins)
1406 {
1407     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
1408     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
1409     DWORD flags;
1410     DWORD reg = ins->dst[0].reg.idx;
1411     SHADER_BUFFER *buffer = ins->ctx->buffer;
1412     char dst_str[50];
1413     char src0_name[50];
1414     char dst_reg[50];
1415     BOOL is_color;
1416
1417     /* We know that we're writing to the uninitialized T<reg> register, so use it for temporary storage */
1418     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
1419
1420     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
1421     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
1422     shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
1423     flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
1424     shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED, FALSE);
1425 }
1426
1427 static void pshader_hw_texm3x3pad(const struct wined3d_shader_instruction *ins)
1428 {
1429     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
1430     DWORD reg = ins->dst[0].reg.idx;
1431     SHADER_BUFFER *buffer = ins->ctx->buffer;
1432     SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
1433     char src0_name[50], dst_name[50];
1434     struct wined3d_shader_register tmp_reg = ins->dst[0].reg;
1435     BOOL is_color;
1436
1437     /* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with
1438      * incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination
1439      * register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared')
1440      */
1441     tmp_reg.idx = reg + 2 - current_state->current_row;
1442     shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color);
1443
1444     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
1445     shader_addline(buffer, "DP3 %s%u.%c, fragment.texcoord[%u], %s;\n",
1446                    dst_name, tmp_reg.idx, 'x' + current_state->current_row, reg, src0_name);
1447     current_state->texcoord_w[current_state->current_row++] = reg;
1448 }
1449
1450 static void pshader_hw_texm3x3tex(const struct wined3d_shader_instruction *ins)
1451 {
1452     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
1453     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
1454     DWORD flags;
1455     DWORD reg = ins->dst[0].reg.idx;
1456     SHADER_BUFFER *buffer = ins->ctx->buffer;
1457     SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
1458     char dst_str[50];
1459     char src0_name[50], dst_name[50];
1460     BOOL is_color;
1461
1462     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
1463     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
1464     shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name);
1465
1466     /* Sample the texture using the calculated coordinates */
1467     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
1468     flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
1469     shader_hw_sample(ins, reg, dst_str, dst_name, flags & WINED3DTTFF_PROJECTED, FALSE);
1470     current_state->current_row = 0;
1471 }
1472
1473 static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins)
1474 {
1475     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
1476     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
1477     DWORD flags;
1478     DWORD reg = ins->dst[0].reg.idx;
1479     SHADER_BUFFER *buffer = ins->ctx->buffer;
1480     SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
1481     char dst_str[50];
1482     char src0_name[50];
1483     char dst_reg[8];
1484     BOOL is_color;
1485
1486     /* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all
1487      * components for temporary data storage
1488      */
1489     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
1490     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
1491     shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
1492
1493     /* Construct the eye-ray vector from w coordinates */
1494     shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", current_state->texcoord_w[0]);
1495     shader_addline(buffer, "MOV TB.y, fragment.texcoord[%u].w;\n", current_state->texcoord_w[1]);
1496     shader_addline(buffer, "MOV TB.z, fragment.texcoord[%u].w;\n", reg);
1497
1498     /* Calculate reflection vector
1499      */
1500     shader_addline(buffer, "DP3 %s.w, %s, TB;\n", dst_reg, dst_reg);
1501     /* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */
1502     shader_addline(buffer, "DP3 TB.w, %s, %s;\n", dst_reg, dst_reg);
1503     shader_addline(buffer, "RCP TB.w, TB.w;\n");
1504     shader_addline(buffer, "MUL %s.w, %s.w, TB.w;\n", dst_reg, dst_reg);
1505     shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
1506     shader_addline(buffer, "MAD %s, coefmul.x, %s, -TB;\n", dst_reg, dst_reg);
1507
1508     /* Sample the texture using the calculated coordinates */
1509     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
1510     flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
1511     shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED, FALSE);
1512     current_state->current_row = 0;
1513 }
1514
1515 static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins)
1516 {
1517     IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)ins->ctx->shader;
1518     IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
1519     DWORD flags;
1520     DWORD reg = ins->dst[0].reg.idx;
1521     SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
1522     SHADER_BUFFER *buffer = ins->ctx->buffer;
1523     char dst_str[50];
1524     char src0_name[50];
1525     char src1_name[50];
1526     char dst_reg[8];
1527     BOOL is_color;
1528
1529     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
1530     shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name);
1531     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
1532     /* Note: dst_reg.xy is input here, generated by two texm3x3pad instructions */
1533     shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
1534
1535     /* Calculate reflection vector.
1536      *
1537      *                   dot(N, E)
1538      * dst_reg.xyz = 2 * --------- * N - E
1539      *                   dot(N, N)
1540      *
1541      * Which normalizes the normal vector
1542      */
1543     shader_addline(buffer, "DP3 %s.w, %s, %s;\n", dst_reg, dst_reg, src1_name);
1544     shader_addline(buffer, "DP3 TC.w, %s, %s;\n", dst_reg, dst_reg);
1545     shader_addline(buffer, "RCP TC.w, TC.w;\n");
1546     shader_addline(buffer, "MUL %s.w, %s.w, TC.w;\n", dst_reg, dst_reg);
1547     shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
1548     shader_addline(buffer, "MAD %s, coefmul.x, %s, -%s;\n", dst_reg, dst_reg, src1_name);
1549
1550     /* Sample the texture using the calculated coordinates */
1551     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
1552     flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
1553     shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED, FALSE);
1554     current_state->current_row = 0;
1555 }
1556
1557 static void pshader_hw_texdepth(const struct wined3d_shader_instruction *ins)
1558 {
1559     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1560     SHADER_BUFFER *buffer = ins->ctx->buffer;
1561     char dst_name[50];
1562
1563     /* texdepth has an implicit destination, the fragment depth value. It's only parameter,
1564      * which is essentially an input, is the destination register because it is the first
1565      * parameter. According to the msdn, this must be register r5, but let's keep it more flexible
1566      * here(writemasks/swizzles are not valid on texdepth)
1567      */
1568     shader_arb_get_dst_param(ins, dst, dst_name);
1569
1570     /* According to the msdn, the source register(must be r5) is unusable after
1571      * the texdepth instruction, so we're free to modify it
1572      */
1573     shader_addline(buffer, "MIN %s.y, %s.y, one.y;\n", dst_name, dst_name);
1574
1575     /* How to deal with the special case dst_name.g == 0? if r != 0, then
1576      * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct
1577      * result. But if r = 0.0, then 0 * inf = 0, which is incorrect.
1578      */
1579     shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name);
1580     shader_addline(buffer, "MUL TA.x, %s.x, %s.y;\n", dst_name, dst_name);
1581     shader_addline(buffer, "MIN TA.x, TA.x, one.x;\n");
1582     shader_addline(buffer, "MAX result.depth, TA.x, 0.0;\n");
1583 }
1584
1585 /** Process the WINED3DSIO_TEXDP3TEX instruction in ARB:
1586  * Take a 3-component dot product of the TexCoord[dstreg] and src,
1587  * then perform a 1D texture lookup from stage dstregnum, place into dst. */
1588 static void pshader_hw_texdp3tex(const struct wined3d_shader_instruction *ins)
1589 {
1590     SHADER_BUFFER *buffer = ins->ctx->buffer;
1591     DWORD sampler_idx = ins->dst[0].reg.idx;
1592     char src0[50];
1593     char dst_str[50];
1594
1595     shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
1596     shader_addline(buffer, "MOV TB, 0.0;\n");
1597     shader_addline(buffer, "DP3 TB.x, fragment.texcoord[%u], %s;\n", sampler_idx, src0);
1598
1599     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
1600     shader_hw_sample(ins, sampler_idx, dst_str, "TB", FALSE /* Only one coord, can't be projected */, FALSE);
1601 }
1602
1603 /** Process the WINED3DSIO_TEXDP3 instruction in ARB:
1604  * Take a 3-component dot product of the TexCoord[dstreg] and src. */
1605 static void pshader_hw_texdp3(const struct wined3d_shader_instruction *ins)
1606 {
1607     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1608     char src0[50];
1609     char dst_str[50];
1610     SHADER_BUFFER *buffer = ins->ctx->buffer;
1611
1612     /* Handle output register */
1613     shader_arb_get_dst_param(ins, dst, dst_str);
1614     shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
1615     shader_addline(buffer, "DP3 %s, fragment.texcoord[%u], %s;\n", dst_str, dst->reg.idx, src0);
1616 }
1617
1618 /** Process the WINED3DSIO_TEXM3X3 instruction in ARB
1619  * Perform the 3rd row of a 3x3 matrix multiply */
1620 static void pshader_hw_texm3x3(const struct wined3d_shader_instruction *ins)
1621 {
1622     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1623     SHADER_BUFFER *buffer = ins->ctx->buffer;
1624     char dst_str[50], dst_name[50];
1625     char src0[50];
1626     BOOL is_color;
1627
1628     shader_arb_get_dst_param(ins, dst, dst_str);
1629     shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
1630     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
1631     shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx, src0);
1632     shader_addline(buffer, "MOV %s, %s;\n", dst_str, dst_name);
1633 }
1634
1635 /** Process the WINED3DSIO_TEXM3X2DEPTH instruction in ARB:
1636  * Last row of a 3x2 matrix multiply, use the result to calculate the depth:
1637  * Calculate tmp0.y = TexCoord[dstreg] . src.xyz;  (tmp0.x has already been calculated)
1638  * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y
1639  */
1640 static void pshader_hw_texm3x2depth(const struct wined3d_shader_instruction *ins)
1641 {
1642     SHADER_BUFFER *buffer = ins->ctx->buffer;
1643     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1644     char src0[50], dst_name[50];
1645     BOOL is_color;
1646
1647     shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
1648     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
1649     shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx, src0);
1650
1651     /* How to deal with the special case dst_name.g == 0? if r != 0, then
1652      * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct
1653      * result. But if r = 0.0, then 0 * inf = 0, which is incorrect.
1654      */
1655     shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name);
1656     shader_addline(buffer, "MUL %s.x, %s.x, %s.y;\n", dst_name, dst_name, dst_name);
1657     shader_addline(buffer, "MIN %s.x, %s.x, one.x;\n", dst_name, dst_name);
1658     shader_addline(buffer, "MAX result.depth, %s.x, 0.0;\n", dst_name);
1659 }
1660
1661 /** Handles transforming all WINED3DSIO_M?x? opcodes for
1662     Vertex/Pixel shaders to ARB_vertex_program codes */
1663 static void shader_hw_mnxn(const struct wined3d_shader_instruction *ins)
1664 {
1665     int i;
1666     int nComponents = 0;
1667     struct wined3d_shader_dst_param tmp_dst = {{0}};
1668     struct wined3d_shader_src_param tmp_src[2] = {{{0}}};
1669     struct wined3d_shader_instruction tmp_ins;
1670
1671     memset(&tmp_ins, 0, sizeof(tmp_ins));
1672
1673     /* Set constants for the temporary argument */
1674     tmp_ins.ctx = ins->ctx;
1675     tmp_ins.dst_count = 1;
1676     tmp_ins.dst = &tmp_dst;
1677     tmp_ins.src_count = 2;
1678     tmp_ins.src = tmp_src;
1679
1680     switch(ins->handler_idx)
1681     {
1682         case WINED3DSIH_M4x4:
1683             nComponents = 4;
1684             tmp_ins.handler_idx = WINED3DSIH_DP4;
1685             break;
1686         case WINED3DSIH_M4x3:
1687             nComponents = 3;
1688             tmp_ins.handler_idx = WINED3DSIH_DP4;
1689             break;
1690         case WINED3DSIH_M3x4:
1691             nComponents = 4;
1692             tmp_ins.handler_idx = WINED3DSIH_DP3;
1693             break;
1694         case WINED3DSIH_M3x3:
1695             nComponents = 3;
1696             tmp_ins.handler_idx = WINED3DSIH_DP3;
1697             break;
1698         case WINED3DSIH_M3x2:
1699             nComponents = 2;
1700             tmp_ins.handler_idx = WINED3DSIH_DP3;
1701             break;
1702         default:
1703             FIXME("Unhandled opcode %#x\n", ins->handler_idx);
1704             break;
1705     }
1706
1707     tmp_dst = ins->dst[0];
1708     tmp_src[0] = ins->src[0];
1709     tmp_src[1] = ins->src[1];
1710     for (i = 0; i < nComponents; i++) {
1711         tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i;
1712         shader_hw_map2gl(&tmp_ins);
1713         ++tmp_src[1].reg.idx;
1714     }
1715 }
1716
1717 static void shader_hw_rsq_rcp(const struct wined3d_shader_instruction *ins)
1718 {
1719     SHADER_BUFFER *buffer = ins->ctx->buffer;
1720     const char *instruction;
1721
1722     char dst[50];
1723     char src[50];
1724
1725     switch(ins->handler_idx)
1726     {
1727         case WINED3DSIH_RSQ: instruction = "RSQ"; break;
1728         case WINED3DSIH_RCP: instruction = "RCP"; break;
1729         default: instruction = "";
1730             FIXME("Unhandled opcode %#x\n", ins->handler_idx);
1731             break;
1732     }
1733
1734     shader_arb_get_dst_param(ins, &ins->dst[0], dst); /* Destination */
1735     shader_arb_get_src_param(ins, &ins->src[0], 0, src);
1736     if (ins->src[0].swizzle == WINED3DSP_NOSWIZZLE)
1737     {
1738         /* Dx sdk says .x is used if no swizzle is given, but our test shows that
1739          * .w is used
1740          */
1741         strcat(src, ".w");
1742     }
1743
1744     shader_addline(buffer, "%s%s %s, %s;\n", instruction, shader_arb_get_modifier(ins), dst, src);
1745 }
1746
1747 static void shader_hw_nrm(const struct wined3d_shader_instruction *ins)
1748 {
1749     SHADER_BUFFER *buffer = ins->ctx->buffer;
1750     char dst_name[50];
1751     char src_name[50];
1752
1753     shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
1754     shader_arb_get_src_param(ins, &ins->src[0], 1 /* Use TB */, src_name);
1755     shader_addline(buffer, "DP3 TA, %s, %s;\n", src_name, src_name);
1756     shader_addline(buffer, "RSQ TA, TA.x;\n");
1757     /* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/
1758     shader_addline(buffer, "MUL%s %s, %s, TA;\n", shader_arb_get_modifier(ins), dst_name,
1759                    src_name);
1760 }
1761
1762 static void shader_hw_sincos(const struct wined3d_shader_instruction *ins)
1763 {
1764     /* This instruction exists in ARB, but the d3d instruction takes two extra parameters which
1765      * must contain fixed constants. So we need a separate function to filter those constants and
1766      * can't use map2gl
1767      */
1768     SHADER_BUFFER *buffer = ins->ctx->buffer;
1769     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
1770     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1771     char dst_name[50];
1772     char src_name0[50], src_name1[50], src_name2[50];
1773     BOOL is_color;
1774
1775     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
1776     if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) {
1777         shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
1778         shader_addline(buffer, "SCS%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name,
1779                        src_name0);
1780     } else if(priv->target_version >= NV2) {
1781         shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);
1782
1783         /* Sincos writemask must be .x, .y or .xy */
1784         if(dst->write_mask & WINED3DSP_WRITEMASK_0)
1785             shader_addline(buffer, "COS%s %s.x, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
1786         if(dst->write_mask & WINED3DSP_WRITEMASK_1)
1787             shader_addline(buffer, "SIN%s %s.y, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
1788     } else {
1789         /* Approximate sine and cosine with a taylor series, as per math textbook. The application passes 8
1790          * helper constants(D3DSINCOSCONST1 and D3DSINCOSCONST2) in src1 and src2.
1791          *
1792          * sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ...
1793          * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + ...
1794          *
1795          * The constants we get are:
1796          *
1797          *  +1   +1,     -1     -1     +1      +1      -1       -1
1798          *      ---- ,  ---- , ---- , ----- , ----- , ----- , ------
1799          *      1!*2    2!*4   3!*8   4!*16   5!*32   6!*64   7!*128
1800          *
1801          * If used with x^2, x^3, x^4 etc they calculate sin(x/2) and cos(x/2):
1802          *
1803          * (x/2)^2 = x^2 / 4
1804          * (x/2)^3 = x^3 / 8
1805          * (x/2)^4 = x^4 / 16
1806          * (x/2)^5 = x^5 / 32
1807          * etc
1808          *
1809          * To get the final result:
1810          * sin(x) = 2 * sin(x/2) * cos(x/2)
1811          * cos(x) = cos(x/2)^2 - sin(x/2)^2
1812          * (from sin(x+y) and cos(x+y) rules)
1813          *
1814          * As per MSDN, dst.z is undefined after the operation, and so is
1815          * dst.x and dst.y if they're masked out by the writemask. Ie
1816          * sincos dst.y, src1, c0, c1
1817          * returns the sine in dst.y. dst.x and dst.z are undefined, dst.w is not touched. The assembler
1818          * vsa.exe also stops with an error if the dest register is the same register as the source
1819          * register. This means we can use dest.xyz as temporary storage. The assembler vsa.exe output also
1820          * indicates that sincos consumes 8 instruction slots in vs_2_0(and, strangely, in vs_3_0).
1821          */
1822         shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);
1823         shader_arb_get_src_param(ins, &ins->src[2], 2, src_name2);
1824         shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);
1825
1826         shader_addline(buffer, "MUL %s.x, %s, %s;\n", dst_name, src_name0, src_name0);  /* x ^ 2 */
1827         shader_addline(buffer, "MUL TA.y, %s.x, %s;\n", dst_name, src_name0);           /* x ^ 3 */
1828         shader_addline(buffer, "MUL %s.y, TA.y, %s;\n", dst_name, src_name0);           /* x ^ 4 */
1829         shader_addline(buffer, "MUL TA.z, %s.y, %s;\n", dst_name, src_name0);           /* x ^ 5 */
1830         shader_addline(buffer, "MUL %s.z, TA.z, %s;\n", dst_name, src_name0);           /* x ^ 6 */
1831         shader_addline(buffer, "MUL TA.w, %s.z, %s;\n", dst_name, src_name0);           /* x ^ 7 */
1832
1833         /* sin(x/2)
1834          *
1835          * Unfortunately we don't get the constants in a DP4-capable form. Is there a way to
1836          * properly merge that with MULs in the code above?
1837          * The swizzles .yz and xw however fit into the .yzxw swizzle added to ps_2_0. Maybe
1838          * we can merge the sine and cosine MAD rows to calculate them together.
1839          */
1840         shader_addline(buffer, "MUL TA.x, %s, %s.w;\n", src_name0, src_name2); /* x^1, +1/(1!*2) */
1841         shader_addline(buffer, "MAD TA.x, TA.y, %s.x, TA.x;\n", src_name2); /* -1/(3!*8) */
1842         shader_addline(buffer, "MAD TA.x, TA.z, %s.w, TA.x;\n", src_name1); /* +1/(5!*32) */
1843         shader_addline(buffer, "MAD TA.x, TA.w, %s.x, TA.x;\n", src_name1); /* -1/(7!*128) */
1844
1845         /* cos(x/2) */
1846         shader_addline(buffer, "MAD TA.y, %s.x, %s.y, %s.z;\n", dst_name, src_name2, src_name2); /* -1/(2!*4), +1.0 */
1847         shader_addline(buffer, "MAD TA.y, %s.y, %s.z, TA.y;\n", dst_name, src_name1); /* +1/(4!*16) */
1848         shader_addline(buffer, "MAD TA.y, %s.z, %s.y, TA.y;\n", dst_name, src_name1); /* -1/(6!*64) */
1849
1850         if(dst->write_mask & WINED3DSP_WRITEMASK_0) {
1851             /* cos x */
1852             shader_addline(buffer, "MUL TA.z, TA.y, TA.y;\n");
1853             shader_addline(buffer, "MAD %s.x, -TA.x, TA.x, TA.z;\n", dst_name);
1854         }
1855         if(dst->write_mask & WINED3DSP_WRITEMASK_1) {
1856             /* sin x */
1857             shader_addline(buffer, "MUL %s.y, TA.x, TA.y;\n", dst_name);
1858             shader_addline(buffer, "ADD %s.y, %s.y, %s.y;\n", dst_name, dst_name, dst_name);
1859         }
1860     }
1861 }
1862
1863 /* GL locking is done by the caller */
1864 static void shader_hw_sgn(const struct wined3d_shader_instruction *ins)
1865 {
1866     SHADER_BUFFER *buffer = ins->ctx->buffer;
1867     char dst_name[50];
1868     char src_name[50];
1869     struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1870
1871     /* SGN is only valid in vertex shaders */
1872     if(ctx->target_version == NV2) {
1873         shader_hw_map2gl(ins);
1874         return;
1875     }
1876     shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
1877     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name);
1878
1879     FIXME("Emulated SGN untested\n");
1880     /* If SRC > 0.0, -SRC < SRC = TRUE, otherwise false.
1881      * if SRC < 0.0,  SRC < -SRC = TRUE. If neither is true, src = 0.0
1882      */
1883     if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) {
1884         shader_addline(buffer, "SLT %s, -%s, %s;\n", dst_name, src_name, src_name);
1885     } else {
1886         shader_addline(buffer, "SLT TB, -%s, %s;\n", src_name, src_name);
1887         shader_addline(buffer, "SLT TC,  %s, -%s;\n", src_name, src_name);
1888         shader_addline(buffer, "ADD %s, TB, -TC;\n", dst_name);
1889     }
1890 }
1891
1892 static GLuint create_arb_blt_vertex_program(const WineD3D_GL_Info *gl_info)
1893 {
1894     GLuint program_id = 0;
1895     const char *blt_vprogram =
1896         "!!ARBvp1.0\n"
1897         "PARAM c[1] = { { 1, 0.5 } };\n"
1898         "MOV result.position, vertex.position;\n"
1899         "MOV result.color, c[0].x;\n"
1900         "MOV result.texcoord[0], vertex.texcoord[0];\n"
1901         "END\n";
1902
1903     GL_EXTCALL(glGenProgramsARB(1, &program_id));
1904     GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, program_id));
1905     GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(blt_vprogram), blt_vprogram));
1906
1907     if (glGetError() == GL_INVALID_OPERATION) {
1908         GLint pos;
1909         glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
1910         FIXME("Vertex program error at position %d: %s\n", pos,
1911             debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1912     }
1913
1914     return program_id;
1915 }
1916
1917 /* GL locking is done by the caller */
1918 static GLuint create_arb_blt_fragment_program(const WineD3D_GL_Info *gl_info, enum tex_types tex_type)
1919 {
1920     GLuint program_id = 0;
1921     static const char * const blt_fprograms[tex_type_count] =
1922     {
1923         /* tex_1d */
1924         NULL,
1925         /* tex_2d */
1926         "!!ARBfp1.0\n"
1927         "TEMP R0;\n"
1928         "TEX R0.x, fragment.texcoord[0], texture[0], 2D;\n"
1929         "MOV result.depth.z, R0.x;\n"
1930         "END\n",
1931         /* tex_3d */
1932         NULL,
1933         /* tex_cube */
1934         "!!ARBfp1.0\n"
1935         "TEMP R0;\n"
1936         "TEX R0.x, fragment.texcoord[0], texture[0], CUBE;\n"
1937         "MOV result.depth.z, R0.x;\n"
1938         "END\n",
1939         /* tex_rect */
1940         "!!ARBfp1.0\n"
1941         "TEMP R0;\n"
1942         "TEX R0.x, fragment.texcoord[0], texture[0], RECT;\n"
1943         "MOV result.depth.z, R0.x;\n"
1944         "END\n",
1945     };
1946
1947     if (!blt_fprograms[tex_type])
1948     {
1949         FIXME("tex_type %#x not supported\n", tex_type);
1950         tex_type = tex_2d;
1951     }
1952
1953     GL_EXTCALL(glGenProgramsARB(1, &program_id));
1954     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, program_id));
1955     GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(blt_fprograms[tex_type]), blt_fprograms[tex_type]));
1956
1957     if (glGetError() == GL_INVALID_OPERATION) {
1958         GLint pos;
1959         glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
1960         FIXME("Fragment program error at position %d: %s\n", pos,
1961             debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1962     }
1963
1964     return program_id;
1965 }
1966
1967 static void arbfp_add_sRGB_correction(SHADER_BUFFER *buffer, const char *fragcolor, const char *tmp1,
1968                                       const char *tmp2, const char *tmp3) {
1969     /* Perform sRGB write correction. See GLX_EXT_framebuffer_sRGB */
1970
1971     /* Calculate the > 0.0031308 case */
1972     shader_addline(buffer, "POW %s.x, %s.x, srgb_consts1.z;\n", tmp1, fragcolor);
1973     shader_addline(buffer, "POW %s.y, %s.y, srgb_consts1.z;\n", tmp1, fragcolor);
1974     shader_addline(buffer, "POW %s.z, %s.z, srgb_consts1.z;\n", tmp1, fragcolor);
1975     shader_addline(buffer, "MUL %s, %s, srgb_consts1.w;\n", tmp1, tmp1);
1976     shader_addline(buffer, "SUB %s, %s, srgb_consts2.x;\n", tmp1, tmp1);
1977     /* Calculate the < case */
1978     shader_addline(buffer, "MUL %s, srgb_consts1.x, %s;\n", tmp2, fragcolor);
1979     /* Subtract the comparison value from the fragcolor and use CMP to pick either the > case * or the < case */
1980     shader_addline(buffer, "SUB %s, %s, srgb_consts1.y;\n", tmp3, fragcolor);
1981     shader_addline(buffer, "CMP result.color.xyz, %s, %s, %s;\n", tmp3, tmp2, tmp1);
1982 }
1983
1984 /* GL locking is done by the caller */
1985 static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This,
1986         SHADER_BUFFER *buffer, const struct arb_ps_compile_args *args)
1987 {
1988     const shader_reg_maps* reg_maps = &This->baseShader.reg_maps;
1989     CONST DWORD *function = This->baseShader.function;
1990     const WineD3D_GL_Info *gl_info = &((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info;
1991     const local_constant *lconst;
1992     GLuint retval;
1993     const char *fragcolor;
1994     DWORD *lconst_map = local_const_mapping((IWineD3DBaseShaderImpl *) This);
1995     struct shader_arb_ctx_priv priv_ctx;
1996
1997     /*  Create the hw ARB shader */
1998     memset(&priv_ctx, 0, sizeof(priv_ctx));
1999     priv_ctx.cur_ps_args = args;
2000     list_init(&priv_ctx.if_frames);
2001
2002     shader_addline(buffer, "!!ARBfp1.0\n");
2003     if(GL_SUPPORT(NV_FRAGMENT_PROGRAM_OPTION)) {
2004         shader_addline(buffer, "OPTION NV_fragment_program;\n");
2005         priv_ctx.target_version = NV2;
2006     } else {
2007         priv_ctx.target_version = ARB;
2008     }
2009
2010     if (reg_maps->shader_version.major < 3)
2011     {
2012         switch(args->super.fog) {
2013             case FOG_OFF:
2014                 break;
2015             case FOG_LINEAR:
2016                 shader_addline(buffer, "OPTION ARB_fog_linear;\n");
2017                 break;
2018             case FOG_EXP:
2019                 shader_addline(buffer, "OPTION ARB_fog_exp;\n");
2020                 break;
2021             case FOG_EXP2:
2022                 shader_addline(buffer, "OPTION ARB_fog_exp2;\n");
2023                 break;
2024         }
2025     }
2026
2027     shader_addline(buffer, "TEMP TA;\n");      /* Used for modifiers */
2028     shader_addline(buffer, "TEMP TB;\n");      /* Used for modifiers */
2029     shader_addline(buffer, "TEMP TC;\n");      /* Used for modifiers */
2030     shader_addline(buffer, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
2031     shader_addline(buffer, "PARAM coefmul = { 2, 4, 8, 16 };\n");
2032     shader_addline(buffer, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
2033
2034     if (reg_maps->shader_version.major < 2)
2035     {
2036         fragcolor = "R0";
2037     } else {
2038         if(args->super.srgb_correction) {
2039             shader_addline(buffer, "TEMP TMP_COLOR;\n");
2040             fragcolor = "TMP_COLOR";
2041         } else {
2042             fragcolor = "result.color";
2043         }
2044     }
2045
2046     if(args->super.srgb_correction) {
2047         shader_addline(buffer, "PARAM srgb_consts1 = {%f, %f, %f, %f};\n",
2048                        srgb_mul_low, srgb_cmp, srgb_pow, srgb_mul_high);
2049         shader_addline(buffer, "PARAM srgb_consts2 = {%f, %f, %f, %f};\n",
2050                        srgb_sub_high, 0.0, 0.0, 0.0);
2051     }
2052
2053     /* Base Declarations */
2054     shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, lconst_map);
2055
2056     /* Base Shader Body */
2057     shader_generate_main((IWineD3DBaseShader *)This, buffer, reg_maps, function, &priv_ctx);
2058
2059     if(args->super.srgb_correction) {
2060         arbfp_add_sRGB_correction(buffer, fragcolor, "TA", "TB", "TC");
2061         shader_addline(buffer, "MOV result.color.a, %s;\n", fragcolor);
2062     } else if(reg_maps->shader_version.major < 2) {
2063         shader_addline(buffer, "MOV result.color, %s;\n", fragcolor);
2064     }
2065     shader_addline(buffer, "END\n");
2066
2067     /* TODO: change to resource.glObjectHandle or something like that */
2068     GL_EXTCALL(glGenProgramsARB(1, &retval));
2069
2070     TRACE("Creating a hw pixel shader, prg=%d\n", retval);
2071     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, retval));
2072
2073     TRACE("Created hw pixel shader, prg=%d\n", retval);
2074     /* Create the program and check for errors */
2075     GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
2076                buffer->bsize, buffer->buffer));
2077
2078     if (glGetError() == GL_INVALID_OPERATION) {
2079         GLint errPos;
2080         glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
2081         FIXME("HW PixelShader Error at position %d: %s\n",
2082               errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
2083         retval = 0;
2084     }
2085
2086     /* Load immediate constants */
2087     if(lconst_map) {
2088         LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
2089             const float *value = (const float *)lconst->value;
2090             GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, lconst_map[lconst->idx], value));
2091             checkGLcall("glProgramLocalParameter4fvARB");
2092         }
2093         HeapFree(GetProcessHeap(), 0, lconst_map);
2094     }
2095
2096     return retval;
2097 }
2098
2099 /* GL locking is done by the caller */
2100 static GLuint shader_arb_generate_vshader(IWineD3DVertexShaderImpl *This,
2101         SHADER_BUFFER *buffer, const struct arb_vs_compile_args *args)
2102 {
2103     const shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
2104     CONST DWORD *function = This->baseShader.function;
2105     IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)This->baseShader.device;
2106     const WineD3D_GL_Info *gl_info = &device->adapter->gl_info;
2107     const local_constant *lconst;
2108     GLuint ret;
2109     DWORD *lconst_map = local_const_mapping((IWineD3DBaseShaderImpl *) This);
2110     struct shader_arb_ctx_priv priv_ctx;
2111
2112     memset(&priv_ctx, 0, sizeof(priv_ctx));
2113     priv_ctx.cur_vs_args = args;
2114     list_init(&priv_ctx.if_frames);
2115
2116     /*  Create the hw ARB shader */
2117     shader_addline(buffer, "!!ARBvp1.0\n");
2118
2119     if(GL_SUPPORT(NV_VERTEX_PROGRAM2_OPTION)) {
2120         shader_addline(buffer, "OPTION NV_vertex_program2;\n");
2121         priv_ctx.target_version = NV2;
2122     } else {
2123         priv_ctx.target_version = ARB;
2124     }
2125
2126     shader_addline(buffer, "TEMP TMP_OUT;\n");
2127     if(need_helper_const(gl_info)) {
2128         shader_addline(buffer, "PARAM helper_const = { 2.0, -1.0, %d.0, 0.0 };\n", This->rel_offset);
2129     }
2130     if(need_mova_const((IWineD3DBaseShader *) This, gl_info)) {
2131         shader_addline(buffer, "PARAM mova_const = { 0.5, 0.0, 2.0, 1.0 };\n");
2132         shader_addline(buffer, "TEMP A0_SHADOW;\n");
2133     }
2134
2135     /* Mesa supports only 95 constants */
2136     if (GL_VEND(MESA) || GL_VEND(WINE))
2137         This->baseShader.limits.constant_float =
2138                 min(95, This->baseShader.limits.constant_float);
2139
2140     shader_addline(buffer, "TEMP TA;\n");
2141
2142     /* Base Declarations */
2143     shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, lconst_map);
2144
2145     /* We need a constant to fixup the final position */
2146     shader_addline(buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
2147
2148     /* Initialize output parameters. GL_ARB_vertex_program does not require special initialization values
2149      * for output parameters. D3D in theory does not do that either, but some applications depend on a
2150      * proper initialization of the secondary color, and programs using the fixed function pipeline without
2151      * a replacement shader depend on the texcoord.w being set properly.
2152      *
2153      * GL_NV_vertex_program defines that all output values are initialized to {0.0, 0.0, 0.0, 1.0}. This
2154      * assertion is in effect even when using GL_ARB_vertex_program without any NV specific additions. So
2155      * skip this if NV_vertex_program is supported. Otherwise, initialize the secondary color. For the tex-
2156      * coords, we have a flag in the opengl caps. Many cards do not require the texcoord being set, and
2157      * this can eat a number of instructions, so skip it unless this cap is set as well
2158      */
2159     if(!GL_SUPPORT(NV_VERTEX_PROGRAM)) {
2160         shader_addline(buffer, "MOV result.color.secondary, -helper_const.wwwy;\n");
2161
2162         if((GLINFO_LOCATION).set_texcoord_w && !device->frag_pipe->ffp_proj_control) {
2163             int i;
2164             for(i = 0; i < min(8, MAX_REG_TEXCRD); i++) {
2165                 if(This->baseShader.reg_maps.texcoord_mask[i] != 0 &&
2166                 This->baseShader.reg_maps.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) {
2167                     shader_addline(buffer, "MOV result.texcoord[%u].w, -helper_const.y;\n", i);
2168                 }
2169             }
2170         }
2171     }
2172
2173     /* Base Shader Body */
2174     shader_generate_main((IWineD3DBaseShader *)This, buffer, reg_maps, function, &priv_ctx);
2175
2176     /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used
2177      * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE),
2178      * the fog frag coord is thrown away. If the fog frag coord is used, but not written by
2179      * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0)
2180      */
2181     if(args->super.fog_src == VS_FOG_Z) {
2182         shader_addline(buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
2183     } else if (!reg_maps->fog) {
2184         /* posFixup.x is always 1.0, so we can savely use it */
2185         shader_addline(buffer, "ADD result.fogcoord, posFixup.x, -posFixup.x;\n");
2186     }
2187
2188     /* Write the final position.
2189      *
2190      * OpenGL coordinates specify the center of the pixel while d3d coords specify
2191      * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
2192      * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
2193      * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
2194      */
2195     shader_addline(buffer, "MUL TA, posFixup, TMP_OUT.w;\n");
2196     shader_addline(buffer, "ADD TMP_OUT.x, TMP_OUT.x, TA.z;\n");
2197     shader_addline(buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TA.w;\n");
2198
2199     /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
2200      * and the glsl equivalent
2201      */
2202     if(need_helper_const(gl_info)) {
2203         shader_addline(buffer, "MAD TMP_OUT.z, TMP_OUT.z, helper_const.x, -TMP_OUT.w;\n");
2204     } else {
2205         shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, TMP_OUT.z;\n");
2206         shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, -TMP_OUT.w;\n");
2207     }
2208
2209     shader_addline(buffer, "MOV result.position, TMP_OUT;\n");
2210
2211     shader_addline(buffer, "END\n");
2212
2213     /* TODO: change to resource.glObjectHandle or something like that */
2214     GL_EXTCALL(glGenProgramsARB(1, &ret));
2215
2216     TRACE("Creating a hw vertex shader, prg=%d\n", ret);
2217     GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, ret));
2218
2219     TRACE("Created hw vertex shader, prg=%d\n", ret);
2220     /* Create the program and check for errors */
2221     GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
2222                buffer->bsize, buffer->buffer));
2223
2224     if (glGetError() == GL_INVALID_OPERATION) {
2225         GLint errPos;
2226         glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
2227         FIXME("HW VertexShader Error at position %d: %s\n",
2228               errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
2229         ret = -1;
2230     } else {
2231         /* Load immediate constants */
2232         if(lconst_map) {
2233             LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
2234                 const float *value = (const float *)lconst->value;
2235                 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, lconst_map[lconst->idx], value));
2236             }
2237         }
2238     }
2239     HeapFree(GetProcessHeap(), 0, lconst_map);
2240
2241     return ret;
2242 }
2243
2244 /* GL locking is done by the caller */
2245 static GLuint find_arb_pshader(IWineD3DPixelShaderImpl *shader, const struct arb_ps_compile_args *args)
2246 {
2247     UINT i;
2248     DWORD new_size;
2249     struct arb_ps_compiled_shader *new_array;
2250     SHADER_BUFFER buffer;
2251     struct arb_pshader_private *shader_data;
2252     GLuint ret;
2253
2254     if(!shader->backend_priv) {
2255         shader->backend_priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
2256     }
2257     shader_data = shader->backend_priv;
2258
2259     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
2260      * so a linear search is more performant than a hashmap or a binary search
2261      * (cache coherency etc)
2262      */
2263     for(i = 0; i < shader_data->num_gl_shaders; i++) {
2264         if(memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args)) == 0) {
2265             return shader_data->gl_shaders[i].prgId;
2266         }
2267     }
2268
2269     TRACE("No matching GL shader found, compiling a new shader\n");
2270     if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
2271         if (shader_data->num_gl_shaders)
2272         {
2273             new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
2274             new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders,
2275                                     new_size * sizeof(*shader_data->gl_shaders));
2276         } else {
2277             new_array = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader_data->gl_shaders));
2278             new_size = 1;
2279         }
2280
2281         if(!new_array) {
2282             ERR("Out of memory\n");
2283             return 0;
2284         }
2285         shader_data->gl_shaders = new_array;
2286         shader_data->shader_array_size = new_size;
2287     }
2288
2289     shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
2290
2291     pixelshader_update_samplers(&shader->baseShader.reg_maps,
2292             ((IWineD3DDeviceImpl *)shader->baseShader.device)->stateBlock->textures);
2293
2294     shader_buffer_init(&buffer);
2295     ret = shader_arb_generate_pshader(shader, &buffer, args);
2296     shader_buffer_free(&buffer);
2297     shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
2298
2299     return ret;
2300 }
2301
2302 static inline BOOL vs_args_equal(const struct arb_vs_compile_args *stored, const struct arb_vs_compile_args *new,
2303                                  const DWORD use_map) {
2304     if((stored->super.swizzle_map & use_map) != new->super.swizzle_map) return FALSE;
2305     if(stored->super.fog_src != new->super.fog_src) return FALSE;
2306     return stored->bools == new->bools;
2307 }
2308
2309 static GLuint find_arb_vshader(IWineD3DVertexShaderImpl *shader, const struct arb_vs_compile_args *args)
2310 {
2311     UINT i;
2312     DWORD new_size;
2313     struct arb_vs_compiled_shader *new_array;
2314     DWORD use_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.use_map;
2315     SHADER_BUFFER buffer;
2316     struct arb_vshader_private *shader_data;
2317     GLuint ret;
2318
2319     if(!shader->backend_priv) {
2320         shader->backend_priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
2321     }
2322     shader_data = shader->backend_priv;
2323
2324     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
2325      * so a linear search is more performant than a hashmap or a binary search
2326      * (cache coherency etc)
2327      */
2328     for(i = 0; i < shader_data->num_gl_shaders; i++) {
2329         if(vs_args_equal(&shader_data->gl_shaders[i].args, args, use_map)) {
2330             return shader_data->gl_shaders[i].prgId;
2331         }
2332     }
2333
2334     TRACE("No matching GL shader found, compiling a new shader\n");
2335
2336     if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
2337         if (shader_data->num_gl_shaders)
2338         {
2339             new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
2340             new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders,
2341                                     new_size * sizeof(*shader_data->gl_shaders));
2342         } else {
2343             new_array = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader_data->gl_shaders));
2344             new_size = 1;
2345         }
2346
2347         if(!new_array) {
2348             ERR("Out of memory\n");
2349             return 0;
2350         }
2351         shader_data->gl_shaders = new_array;
2352         shader_data->shader_array_size = new_size;
2353     }
2354
2355     shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
2356
2357     shader_buffer_init(&buffer);
2358     ret = shader_arb_generate_vshader(shader, &buffer, args);
2359     shader_buffer_free(&buffer);
2360     shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
2361
2362     return ret;
2363 }
2364
2365 static inline void find_arb_ps_compile_args(IWineD3DPixelShaderImpl *shader, IWineD3DStateBlockImpl *stateblock,
2366         struct arb_ps_compile_args *args)
2367 {
2368     int i;
2369     find_ps_compile_args(shader, stateblock, &args->super);
2370
2371     /* This forces all local boolean constants to 1 to make them stateblock independent */
2372     args->bools = shader->baseShader.reg_maps.local_bool_consts;
2373
2374     for(i = 0; i < MAX_CONST_B; i++)
2375     {
2376         if(stateblock->pixelShaderConstantB[i]) args->bools |= ( 1 << i);
2377     }
2378
2379 }
2380
2381 static inline void find_arb_vs_compile_args(IWineD3DVertexShaderImpl *shader, IWineD3DStateBlockImpl *stateblock,
2382         struct arb_vs_compile_args *args)
2383 {
2384     int i;
2385     find_vs_compile_args(shader, stateblock, &args->super);
2386
2387     /* This forces all local boolean constants to 1 to make them stateblock independent */
2388     args->bools = shader->baseShader.reg_maps.local_bool_consts;
2389
2390     /* TODO: Figure out if it would be better to store bool constants as bitmasks in the stateblock */
2391     for(i = 0; i < MAX_CONST_B; i++)
2392     {
2393         if(stateblock->vertexShaderConstantB[i]) args->bools |= ( 1 << i);
2394     }
2395
2396 }
2397
2398 /* GL locking is done by the caller */
2399 static void shader_arb_select(IWineD3DDevice *iface, BOOL usePS, BOOL useVS) {
2400     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
2401     struct shader_arb_priv *priv = This->shader_priv;
2402     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
2403
2404     if (useVS) {
2405         struct arb_vs_compile_args compile_args;
2406
2407         TRACE("Using vertex shader\n");
2408         find_arb_vs_compile_args((IWineD3DVertexShaderImpl *) This->stateBlock->vertexShader, This->stateBlock, &compile_args);
2409         priv->current_vprogram_id = find_arb_vshader((IWineD3DVertexShaderImpl *) This->stateBlock->vertexShader, &compile_args);
2410
2411         /* Bind the vertex program */
2412         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id));
2413         checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id);");
2414
2415         /* Enable OpenGL vertex programs */
2416         glEnable(GL_VERTEX_PROGRAM_ARB);
2417         checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);");
2418         TRACE("(%p) : Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n", This, priv->current_vprogram_id);
2419     } else if(GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
2420         priv->current_vprogram_id = 0;
2421         glDisable(GL_VERTEX_PROGRAM_ARB);
2422         checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)");
2423     }
2424
2425     if (usePS) {
2426         struct arb_ps_compile_args compile_args;
2427         TRACE("Using pixel shader\n");
2428         find_arb_ps_compile_args((IWineD3DPixelShaderImpl *) This->stateBlock->pixelShader, This->stateBlock, &compile_args);
2429         priv->current_fprogram_id = find_arb_pshader((IWineD3DPixelShaderImpl *) This->stateBlock->pixelShader,
2430                                                      &compile_args);
2431
2432         /* Bind the fragment program */
2433         GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id));
2434         checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id);");
2435
2436         if(!priv->use_arbfp_fixed_func) {
2437             /* Enable OpenGL fragment programs */
2438             glEnable(GL_FRAGMENT_PROGRAM_ARB);
2439             checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB);");
2440         }
2441         TRACE("(%p) : Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n", This, priv->current_fprogram_id);
2442
2443         shader_arb_ps_local_constants(This);
2444     } else if(GL_SUPPORT(ARB_FRAGMENT_PROGRAM) && !priv->use_arbfp_fixed_func) {
2445         /* Disable only if we're not using arbfp fixed function fragment processing. If this is used,
2446          * keep GL_FRAGMENT_PROGRAM_ARB enabled, and the fixed function pipeline will bind the fixed function
2447          * replacement shader
2448          */
2449         glDisable(GL_FRAGMENT_PROGRAM_ARB);
2450         checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
2451         priv->current_fprogram_id = 0;
2452     }
2453 }
2454
2455 /* GL locking is done by the caller */
2456 static void shader_arb_select_depth_blt(IWineD3DDevice *iface, enum tex_types tex_type) {
2457     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
2458     struct shader_arb_priv *priv = This->shader_priv;
2459     GLuint *blt_fprogram = &priv->depth_blt_fprogram_id[tex_type];
2460     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
2461
2462     if (!priv->depth_blt_vprogram_id) priv->depth_blt_vprogram_id = create_arb_blt_vertex_program(gl_info);
2463     GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->depth_blt_vprogram_id));
2464     glEnable(GL_VERTEX_PROGRAM_ARB);
2465
2466     if (!*blt_fprogram) *blt_fprogram = create_arb_blt_fragment_program(gl_info, tex_type);
2467     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, *blt_fprogram));
2468     glEnable(GL_FRAGMENT_PROGRAM_ARB);
2469 }
2470
2471 /* GL locking is done by the caller */
2472 static void shader_arb_deselect_depth_blt(IWineD3DDevice *iface) {
2473     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
2474     struct shader_arb_priv *priv = This->shader_priv;
2475     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
2476
2477     if (priv->current_vprogram_id) {
2478         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id));
2479         checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vertexShader->prgId);");
2480
2481         glEnable(GL_VERTEX_PROGRAM_ARB);
2482         checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);");
2483
2484         TRACE("(%p) : Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n", This, priv->current_vprogram_id);
2485     } else {
2486         glDisable(GL_VERTEX_PROGRAM_ARB);
2487         checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)");
2488     }
2489
2490     if (priv->current_fprogram_id) {
2491         GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id));
2492         checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, pixelShader->prgId);");
2493
2494         glEnable(GL_FRAGMENT_PROGRAM_ARB);
2495         checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB);");
2496
2497         TRACE("(%p) : Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n", This, priv->current_fprogram_id);
2498     } else {
2499         glDisable(GL_FRAGMENT_PROGRAM_ARB);
2500         checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
2501     }
2502 }
2503
2504 static void shader_arb_destroy(IWineD3DBaseShader *iface) {
2505     IWineD3DBaseShaderImpl *baseShader = (IWineD3DBaseShaderImpl *) iface;
2506     const WineD3D_GL_Info *gl_info = &((IWineD3DDeviceImpl *)baseShader->baseShader.device)->adapter->gl_info;
2507
2508     if (shader_is_pshader_version(baseShader->baseShader.reg_maps.shader_version.type))
2509     {
2510         IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *) iface;
2511         struct arb_pshader_private *shader_data = This->backend_priv;
2512         UINT i;
2513
2514         if(!shader_data) return; /* This can happen if a shader was never compiled */
2515         ENTER_GL();
2516         for(i = 0; i < shader_data->num_gl_shaders; i++) {
2517             GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId));
2518             checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))");
2519         }
2520         LEAVE_GL();
2521         HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
2522         HeapFree(GetProcessHeap(), 0, shader_data);
2523         This->backend_priv = NULL;
2524     } else {
2525         IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *) iface;
2526         struct arb_vshader_private *shader_data = This->backend_priv;
2527         UINT i;
2528
2529         if(!shader_data) return; /* This can happen if a shader was never compiled */
2530         ENTER_GL();
2531         for(i = 0; i < shader_data->num_gl_shaders; i++) {
2532             GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId));
2533             checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))");
2534         }
2535         LEAVE_GL();
2536         HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
2537         HeapFree(GetProcessHeap(), 0, shader_data);
2538         This->backend_priv = NULL;
2539     }
2540 }
2541
2542 static HRESULT shader_arb_alloc(IWineD3DDevice *iface) {
2543     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
2544     This->shader_priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_arb_priv));
2545     return WINED3D_OK;
2546 }
2547
2548 static void shader_arb_free(IWineD3DDevice *iface) {
2549     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
2550     const WineD3D_GL_Info *gl_info = &This->adapter->gl_info;
2551     struct shader_arb_priv *priv = This->shader_priv;
2552     int i;
2553
2554     ENTER_GL();
2555     if(priv->depth_blt_vprogram_id) {
2556         GL_EXTCALL(glDeleteProgramsARB(1, &priv->depth_blt_vprogram_id));
2557     }
2558     for (i = 0; i < tex_type_count; ++i) {
2559         if (priv->depth_blt_fprogram_id[i]) {
2560             GL_EXTCALL(glDeleteProgramsARB(1, &priv->depth_blt_fprogram_id[i]));
2561         }
2562     }
2563     LEAVE_GL();
2564
2565     HeapFree(GetProcessHeap(), 0, This->shader_priv);
2566 }
2567
2568 static BOOL shader_arb_dirty_const(IWineD3DDevice *iface) {
2569     return TRUE;
2570 }
2571
2572 static void shader_arb_get_caps(WINED3DDEVTYPE devtype, const WineD3D_GL_Info *gl_info, struct shader_caps *pCaps)
2573 {
2574     /* We don't have an ARB fixed function pipeline yet, so let the none backend set its caps,
2575      * then overwrite the shader specific ones
2576      */
2577     none_shader_backend.shader_get_caps(devtype, gl_info, pCaps);
2578
2579     if(GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
2580         pCaps->VertexShaderVersion = WINED3DVS_VERSION(1,1);
2581         TRACE_(d3d_caps)("Hardware vertex shader version 1.1 enabled (ARB_PROGRAM)\n");
2582         pCaps->MaxVertexShaderConst = GL_LIMITS(vshader_constantsF) - 1;
2583     }
2584
2585     if(GL_SUPPORT(ARB_FRAGMENT_PROGRAM)) {
2586         pCaps->PixelShaderVersion    = WINED3DPS_VERSION(1,4);
2587         pCaps->PixelShader1xMaxValue = 8.0;
2588         TRACE_(d3d_caps)("Hardware pixel shader version 1.4 enabled (ARB_PROGRAM)\n");
2589         pCaps->MaxPixelShaderConst = GL_LIMITS(pshader_constantsF);
2590     }
2591
2592     pCaps->VSClipping = FALSE; /* TODO: GL_NV_vertex_program2_option provides this */
2593 }
2594
2595 static BOOL shader_arb_color_fixup_supported(struct color_fixup_desc fixup)
2596 {
2597     if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
2598     {
2599         TRACE("Checking support for color_fixup:\n");
2600         dump_color_fixup_desc(fixup);
2601     }
2602
2603     /* We support everything except YUV conversions. */
2604     if (!is_yuv_fixup(fixup))
2605     {
2606         TRACE("[OK]\n");
2607         return TRUE;
2608     }
2609
2610     TRACE("[FAILED]\n");
2611     return FALSE;
2612 }
2613
2614 static void shader_arb_add_instruction_modifiers(const struct wined3d_shader_instruction *ins) {
2615     DWORD shift;
2616     char write_mask[20], regstr[50];
2617     SHADER_BUFFER *buffer = ins->ctx->buffer;
2618     BOOL is_color = FALSE;
2619     const struct wined3d_shader_dst_param *dst;
2620
2621     if (!ins->dst_count) return;
2622
2623     dst = &ins->dst[0];
2624     shift = dst->shift;
2625     if(shift == 0) return; /* Saturate alone is handled by the instructions */
2626
2627     shader_arb_get_write_mask(ins, dst, write_mask);
2628     shader_arb_get_register_name(ins, &dst->reg, regstr, &is_color);
2629
2630     /* Generate a line that does the output modifier computation
2631      * FIXME: _SAT vs shift? _SAT alone is already handled in the instructions, if this
2632      * maps problems in e.g. _d4_sat modify shader_arb_get_modifier
2633      */
2634     shader_addline(buffer, "MUL%s %s%s, %s, %s;\n", shader_arb_get_modifier(ins),
2635                    regstr, write_mask, regstr, shift_tab[shift]);
2636 }
2637
2638 static const SHADER_HANDLER shader_arb_instruction_handler_table[WINED3DSIH_TABLE_SIZE] =
2639 {
2640     /* WINED3DSIH_ABS           */ shader_hw_map2gl,
2641     /* WINED3DSIH_ADD           */ shader_hw_map2gl,
2642     /* WINED3DSIH_BEM           */ pshader_hw_bem,
2643     /* WINED3DSIH_BREAK         */ NULL,
2644     /* WINED3DSIH_BREAKC        */ NULL,
2645     /* WINED3DSIH_BREAKP        */ NULL,
2646     /* WINED3DSIH_CALL          */ NULL,
2647     /* WINED3DSIH_CALLNZ        */ NULL,
2648     /* WINED3DSIH_CMP           */ pshader_hw_cmp,
2649     /* WINED3DSIH_CND           */ pshader_hw_cnd,
2650     /* WINED3DSIH_CRS           */ shader_hw_map2gl,
2651     /* WINED3DSIH_DCL           */ NULL,
2652     /* WINED3DSIH_DEF           */ NULL,
2653     /* WINED3DSIH_DEFB          */ NULL,
2654     /* WINED3DSIH_DEFI          */ NULL,
2655     /* WINED3DSIH_DP2ADD        */ pshader_hw_dp2add,
2656     /* WINED3DSIH_DP3           */ shader_hw_map2gl,
2657     /* WINED3DSIH_DP4           */ shader_hw_map2gl,
2658     /* WINED3DSIH_DST           */ shader_hw_map2gl,
2659     /* WINED3DSIH_DSX           */ shader_hw_map2gl,
2660     /* WINED3DSIH_DSY           */ NULL,
2661     /* WINED3DSIH_ELSE          */ NULL,
2662     /* WINED3DSIH_ENDIF         */ NULL,
2663     /* WINED3DSIH_ENDLOOP       */ NULL,
2664     /* WINED3DSIH_ENDREP        */ NULL,
2665     /* WINED3DSIH_EXP           */ shader_hw_map2gl,
2666     /* WINED3DSIH_EXPP          */ shader_hw_map2gl,
2667     /* WINED3DSIH_FRC           */ shader_hw_map2gl,
2668     /* WINED3DSIH_IF            */ NULL,
2669     /* WINED3DSIH_IFC           */ NULL,
2670     /* WINED3DSIH_LABEL         */ NULL,
2671     /* WINED3DSIH_LIT           */ shader_hw_map2gl,
2672     /* WINED3DSIH_LOG           */ shader_hw_map2gl,
2673     /* WINED3DSIH_LOGP          */ shader_hw_map2gl,
2674     /* WINED3DSIH_LOOP          */ NULL,
2675     /* WINED3DSIH_LRP           */ shader_hw_map2gl,
2676     /* WINED3DSIH_M3x2          */ shader_hw_mnxn,
2677     /* WINED3DSIH_M3x3          */ shader_hw_mnxn,
2678     /* WINED3DSIH_M3x4          */ shader_hw_mnxn,
2679     /* WINED3DSIH_M4x3          */ shader_hw_mnxn,
2680     /* WINED3DSIH_M4x4          */ shader_hw_mnxn,
2681     /* WINED3DSIH_MAD           */ shader_hw_map2gl,
2682     /* WINED3DSIH_MAX           */ shader_hw_map2gl,
2683     /* WINED3DSIH_MIN           */ shader_hw_map2gl,
2684     /* WINED3DSIH_MOV           */ shader_hw_mov,
2685     /* WINED3DSIH_MOVA          */ shader_hw_mov,
2686     /* WINED3DSIH_MUL           */ shader_hw_map2gl,
2687     /* WINED3DSIH_NOP           */ shader_hw_nop,
2688     /* WINED3DSIH_NRM           */ shader_hw_nrm,
2689     /* WINED3DSIH_PHASE         */ NULL,
2690     /* WINED3DSIH_POW           */ shader_hw_map2gl,
2691     /* WINED3DSIH_RCP           */ shader_hw_rsq_rcp,
2692     /* WINED3DSIH_REP           */ NULL,
2693     /* WINED3DSIH_RET           */ NULL,
2694     /* WINED3DSIH_RSQ           */ shader_hw_rsq_rcp,
2695     /* WINED3DSIH_SETP          */ NULL,
2696     /* WINED3DSIH_SGE           */ shader_hw_map2gl,
2697     /* WINED3DSIH_SGN           */ shader_hw_sgn,
2698     /* WINED3DSIH_SINCOS        */ shader_hw_sincos,
2699     /* WINED3DSIH_SLT           */ shader_hw_map2gl,
2700     /* WINED3DSIH_SUB           */ shader_hw_map2gl,
2701     /* WINED3DSIH_TEX           */ pshader_hw_tex,
2702     /* WINED3DSIH_TEXBEM        */ pshader_hw_texbem,
2703     /* WINED3DSIH_TEXBEML       */ pshader_hw_texbem,
2704     /* WINED3DSIH_TEXCOORD      */ pshader_hw_texcoord,
2705     /* WINED3DSIH_TEXDEPTH      */ pshader_hw_texdepth,
2706     /* WINED3DSIH_TEXDP3        */ pshader_hw_texdp3,
2707     /* WINED3DSIH_TEXDP3TEX     */ pshader_hw_texdp3tex,
2708     /* WINED3DSIH_TEXKILL       */ pshader_hw_texkill,
2709     /* WINED3DSIH_TEXLDD        */ NULL,
2710     /* WINED3DSIH_TEXLDL        */ NULL,
2711     /* WINED3DSIH_TEXM3x2DEPTH  */ pshader_hw_texm3x2depth,
2712     /* WINED3DSIH_TEXM3x2PAD    */ pshader_hw_texm3x2pad,
2713     /* WINED3DSIH_TEXM3x2TEX    */ pshader_hw_texm3x2tex,
2714     /* WINED3DSIH_TEXM3x3       */ pshader_hw_texm3x3,
2715     /* WINED3DSIH_TEXM3x3DIFF   */ NULL,
2716     /* WINED3DSIH_TEXM3x3PAD    */ pshader_hw_texm3x3pad,
2717     /* WINED3DSIH_TEXM3x3SPEC   */ pshader_hw_texm3x3spec,
2718     /* WINED3DSIH_TEXM3x3TEX    */ pshader_hw_texm3x3tex,
2719     /* WINED3DSIH_TEXM3x3VSPEC  */ pshader_hw_texm3x3vspec,
2720     /* WINED3DSIH_TEXREG2AR     */ pshader_hw_texreg2ar,
2721     /* WINED3DSIH_TEXREG2GB     */ pshader_hw_texreg2gb,
2722     /* WINED3DSIH_TEXREG2RGB    */ pshader_hw_texreg2rgb,
2723 };
2724
2725 static inline BOOL get_bool_const(const struct wined3d_shader_instruction *ins, IWineD3DBaseShaderImpl *This, DWORD idx)
2726 {
2727     BOOL vshader = shader_is_vshader_version(This->baseShader.reg_maps.shader_version.type);
2728     WORD bools = 0;
2729     WORD flag = (1 << idx);
2730     const local_constant *constant;
2731     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2732
2733     if(This->baseShader.reg_maps.local_bool_consts & flag)
2734     {
2735         /* What good is a if(bool) with a hardcoded local constant? I don't know, but handle it */
2736         LIST_FOR_EACH_ENTRY(constant, &This->baseShader.constantsB, local_constant, entry)
2737         {
2738             if (constant->idx == idx)
2739             {
2740                 return constant->value[0];
2741             }
2742         }
2743         ERR("Local constant not found\n");
2744         return FALSE;
2745     }
2746     else
2747     {
2748         if(vshader) bools = priv->cur_vs_args->bools;
2749         else bools = priv->cur_ps_args->bools;
2750         return bools & flag;
2751     }
2752 }
2753
2754 static void shader_arb_handle_instruction(const struct wined3d_shader_instruction *ins) {
2755     SHADER_HANDLER hw_fct;
2756     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2757     IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
2758     struct if_frame *if_frame;
2759     SHADER_BUFFER *buffer = ins->ctx->buffer;
2760
2761     /* boolean if */
2762     if(ins->handler_idx == WINED3DSIH_IF)
2763     {
2764         if_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*if_frame));
2765         list_add_head(&priv->if_frames, &if_frame->entry);
2766
2767         if(!priv->muted && get_bool_const(ins, This, ins->src[0].reg.idx) == FALSE)
2768         {
2769             shader_addline(buffer, "#if(FALSE){\n");
2770             priv->muted = TRUE;
2771             if_frame->muting = TRUE;
2772         }
2773         else shader_addline(buffer, "#if(TRUE) {\n");
2774
2775         return; /* Instruction is handled */
2776     }
2777     else if(ins->handler_idx == WINED3DSIH_IFC)
2778     {
2779         /* IF(bool) and if_cond(a, b) use the same ELSE and ENDIF tokens */
2780         if_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*if_frame));
2781         if_frame->ifc = TRUE;
2782         list_add_head(&priv->if_frames, &if_frame->entry);
2783     }
2784     else if(ins->handler_idx == WINED3DSIH_ELSE)
2785     {
2786         struct list *e = list_head(&priv->if_frames);
2787         if_frame = LIST_ENTRY(e, struct if_frame, entry);
2788
2789         if(if_frame->ifc == FALSE)
2790         {
2791             shader_addline(buffer, "#} else {\n");
2792             if(!priv->muted && !if_frame->muting)
2793             {
2794                 priv->muted = TRUE;
2795                 if_frame->muting = TRUE;
2796             }
2797             else if(if_frame->muting) priv->muted = FALSE;
2798             return; /* Instruction is handled. */
2799         }
2800         /* In case of an ifc, generate a HW shader instruction */
2801     }
2802     else if(ins->handler_idx == WINED3DSIH_ENDIF)
2803     {
2804         struct list *e = list_head(&priv->if_frames);
2805         if_frame = LIST_ENTRY(e, struct if_frame, entry);
2806
2807         if(!if_frame->ifc)
2808         {
2809             shader_addline(buffer, "#} endif\n");
2810             if(if_frame->muting) priv->muted = FALSE;
2811             list_remove(&if_frame->entry);
2812             HeapFree(GetProcessHeap(), 0, if_frame);
2813             return; /* Instruction is handled */
2814         }
2815         else
2816         {
2817             list_remove(&if_frame->entry);
2818             HeapFree(GetProcessHeap(), 0, if_frame);
2819             /* ifc - generate a hw endif */
2820         }
2821     }
2822
2823     if(priv->muted) return;
2824
2825     /* Select handler */
2826     hw_fct = shader_arb_instruction_handler_table[ins->handler_idx];
2827
2828     /* Unhandled opcode */
2829     if (!hw_fct)
2830     {
2831         FIXME("Backend can't handle opcode %#x\n", ins->handler_idx);
2832         return;
2833     }
2834     hw_fct(ins);
2835
2836     shader_arb_add_instruction_modifiers(ins);
2837 }
2838
2839 const shader_backend_t arb_program_shader_backend = {
2840     shader_arb_handle_instruction,
2841     shader_arb_select,
2842     shader_arb_select_depth_blt,
2843     shader_arb_deselect_depth_blt,
2844     shader_arb_update_float_vertex_constants,
2845     shader_arb_update_float_pixel_constants,
2846     shader_arb_load_constants,
2847     shader_arb_load_np2fixup_constants,
2848     shader_arb_destroy,
2849     shader_arb_alloc,
2850     shader_arb_free,
2851     shader_arb_dirty_const,
2852     shader_arb_get_caps,
2853     shader_arb_color_fixup_supported,
2854 };
2855
2856 /* ARB_fragment_program fixed function pipeline replacement definitions */
2857 #define ARB_FFP_CONST_TFACTOR           0
2858 #define ARB_FFP_CONST_SPECULAR_ENABLE   ((ARB_FFP_CONST_TFACTOR) + 1)
2859 #define ARB_FFP_CONST_CONSTANT(i)       ((ARB_FFP_CONST_SPECULAR_ENABLE) + 1 + i)
2860 #define ARB_FFP_CONST_BUMPMAT(i)        ((ARB_FFP_CONST_CONSTANT(7)) + 1 + i)
2861 #define ARB_FFP_CONST_LUMINANCE(i)      ((ARB_FFP_CONST_BUMPMAT(7)) + 1 + i)
2862
2863 struct arbfp_ffp_desc
2864 {
2865     struct ffp_frag_desc parent;
2866     GLuint shader;
2867     unsigned int num_textures_used;
2868 };
2869
2870 static void arbfp_enable(IWineD3DDevice *iface, BOOL enable) {
2871     ENTER_GL();
2872     if(enable) {
2873         glEnable(GL_FRAGMENT_PROGRAM_ARB);
2874         checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)");
2875     } else {
2876         glDisable(GL_FRAGMENT_PROGRAM_ARB);
2877         checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
2878     }
2879     LEAVE_GL();
2880 }
2881
2882 static HRESULT arbfp_alloc(IWineD3DDevice *iface) {
2883     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *) iface;
2884     struct shader_arb_priv *priv;
2885     /* Share private data between the shader backend and the pipeline replacement, if both
2886      * are the arb implementation. This is needed to figure out whether ARBfp should be disabled
2887      * if no pixel shader is bound or not
2888      */
2889     if(This->shader_backend == &arb_program_shader_backend) {
2890         This->fragment_priv = This->shader_priv;
2891     } else {
2892         This->fragment_priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_arb_priv));
2893         if(!This->fragment_priv) return E_OUTOFMEMORY;
2894     }
2895     priv = This->fragment_priv;
2896     priv->fragment_shaders = hash_table_create(ffp_frag_program_key_hash, ffp_frag_program_key_compare);
2897     priv->use_arbfp_fixed_func = TRUE;
2898     return WINED3D_OK;
2899 }
2900
2901 static void arbfp_free_ffpshader(void *value, void *gli) {
2902     const WineD3D_GL_Info *gl_info = gli;
2903     struct arbfp_ffp_desc *entry_arb = value;
2904
2905     ENTER_GL();
2906     GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader));
2907     checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)");
2908     HeapFree(GetProcessHeap(), 0, entry_arb);
2909     LEAVE_GL();
2910 }
2911
2912 static void arbfp_free(IWineD3DDevice *iface) {
2913     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *) iface;
2914     struct shader_arb_priv *priv = This->fragment_priv;
2915
2916     hash_table_destroy(priv->fragment_shaders, arbfp_free_ffpshader, &This->adapter->gl_info);
2917     priv->use_arbfp_fixed_func = FALSE;
2918
2919     if(This->shader_backend != &arb_program_shader_backend) {
2920         HeapFree(GetProcessHeap(), 0, This->fragment_priv);
2921     }
2922 }
2923
2924 static void arbfp_get_caps(WINED3DDEVTYPE devtype, const WineD3D_GL_Info *gl_info, struct fragment_caps *caps)
2925 {
2926     caps->TextureOpCaps =  WINED3DTEXOPCAPS_DISABLE                     |
2927                            WINED3DTEXOPCAPS_SELECTARG1                  |
2928                            WINED3DTEXOPCAPS_SELECTARG2                  |
2929                            WINED3DTEXOPCAPS_MODULATE4X                  |
2930                            WINED3DTEXOPCAPS_MODULATE2X                  |
2931                            WINED3DTEXOPCAPS_MODULATE                    |
2932                            WINED3DTEXOPCAPS_ADDSIGNED2X                 |
2933                            WINED3DTEXOPCAPS_ADDSIGNED                   |
2934                            WINED3DTEXOPCAPS_ADD                         |
2935                            WINED3DTEXOPCAPS_SUBTRACT                    |
2936                            WINED3DTEXOPCAPS_ADDSMOOTH                   |
2937                            WINED3DTEXOPCAPS_BLENDCURRENTALPHA           |
2938                            WINED3DTEXOPCAPS_BLENDFACTORALPHA            |
2939                            WINED3DTEXOPCAPS_BLENDTEXTUREALPHA           |
2940                            WINED3DTEXOPCAPS_BLENDDIFFUSEALPHA           |
2941                            WINED3DTEXOPCAPS_BLENDTEXTUREALPHAPM         |
2942                            WINED3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR      |
2943                            WINED3DTEXOPCAPS_MODULATECOLOR_ADDALPHA      |
2944                            WINED3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA   |
2945                            WINED3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR   |
2946                            WINED3DTEXOPCAPS_DOTPRODUCT3                 |
2947                            WINED3DTEXOPCAPS_MULTIPLYADD                 |
2948                            WINED3DTEXOPCAPS_LERP                        |
2949                            WINED3DTEXOPCAPS_BUMPENVMAP                  |
2950                            WINED3DTEXOPCAPS_BUMPENVMAPLUMINANCE;
2951
2952     /* TODO: Implement WINED3DTEXOPCAPS_PREMODULATE */
2953
2954     caps->MaxTextureBlendStages   = 8;
2955     caps->MaxSimultaneousTextures = min(GL_LIMITS(fragment_samplers), 8);
2956
2957     caps->PrimitiveMiscCaps |= WINED3DPMISCCAPS_TSSARGTEMP;
2958 }
2959 #undef GLINFO_LOCATION
2960
2961 #define GLINFO_LOCATION stateblock->wineD3DDevice->adapter->gl_info
2962 static void state_texfactor_arbfp(DWORD state, IWineD3DStateBlockImpl *stateblock, WineD3DContext *context) {
2963     float col[4];
2964     IWineD3DDeviceImpl *device = stateblock->wineD3DDevice;
2965
2966     /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite
2967      * application provided constants
2968      */
2969     if(device->shader_backend == &arb_program_shader_backend) {
2970         if (use_ps(stateblock)) return;
2971
2972         device = stateblock->wineD3DDevice;
2973         device->activeContext->pshader_const_dirty[ARB_FFP_CONST_TFACTOR] = 1;
2974         device->highest_dirty_ps_const = max(device->highest_dirty_ps_const, ARB_FFP_CONST_TFACTOR + 1);
2975     }
2976
2977     D3DCOLORTOGLFLOAT4(stateblock->renderState[WINED3DRS_TEXTUREFACTOR], col);
2978     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, col));
2979     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, col)");
2980
2981 }
2982
2983 static void state_arb_specularenable(DWORD state, IWineD3DStateBlockImpl *stateblock, WineD3DContext *context) {
2984     float col[4];
2985     IWineD3DDeviceImpl *device = stateblock->wineD3DDevice;
2986
2987     /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite
2988      * application provided constants
2989      */
2990     if(device->shader_backend == &arb_program_shader_backend) {
2991         if (use_ps(stateblock)) return;
2992
2993         device = stateblock->wineD3DDevice;
2994         device->activeContext->pshader_const_dirty[ARB_FFP_CONST_SPECULAR_ENABLE] = 1;
2995         device->highest_dirty_ps_const = max(device->highest_dirty_ps_const, ARB_FFP_CONST_SPECULAR_ENABLE + 1);
2996     }
2997
2998     if(stateblock->renderState[WINED3DRS_SPECULARENABLE]) {
2999         /* The specular color has no alpha */
3000         col[0] = 1.0; col[1] = 1.0;
3001         col[2] = 1.0; col[3] = 0.0;
3002     } else {
3003         col[0] = 0.0; col[1] = 0.0;
3004         col[2] = 0.0; col[3] = 0.0;
3005     }
3006     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col));
3007     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)");
3008 }
3009
3010 static void set_bumpmat_arbfp(DWORD state, IWineD3DStateBlockImpl *stateblock, WineD3DContext *context) {
3011     DWORD stage = (state - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1);
3012     IWineD3DDeviceImpl *device = stateblock->wineD3DDevice;
3013     float mat[2][2];
3014
3015     if (use_ps(stateblock))
3016     {
3017         if(stage != 0 &&
3018            ((IWineD3DPixelShaderImpl *) stateblock->pixelShader)->baseShader.reg_maps.bumpmat[stage]) {
3019             /* The pixel shader has to know the bump env matrix. Do a constants update if it isn't scheduled
3020              * anyway
3021              */
3022             if(!isStateDirty(context, STATE_PIXELSHADERCONSTANT)) {
3023                 device->StateTable[STATE_PIXELSHADERCONSTANT].apply(STATE_PIXELSHADERCONSTANT, stateblock, context);
3024             }
3025         }
3026
3027         if(device->shader_backend == &arb_program_shader_backend) {
3028             /* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants */
3029             return;
3030         }
3031     } else if(device->shader_backend == &arb_program_shader_backend) {
3032         device->activeContext->pshader_const_dirty[ARB_FFP_CONST_BUMPMAT(stage)] = 1;
3033         device->highest_dirty_ps_const = max(device->highest_dirty_ps_const, ARB_FFP_CONST_BUMPMAT(stage) + 1);
3034     }
3035
3036     mat[0][0] = *((float *) &stateblock->textureState[stage][WINED3DTSS_BUMPENVMAT00]);
3037     mat[0][1] = *((float *) &stateblock->textureState[stage][WINED3DTSS_BUMPENVMAT01]);
3038     mat[1][0] = *((float *) &stateblock->textureState[stage][WINED3DTSS_BUMPENVMAT10]);
3039     mat[1][1] = *((float *) &stateblock->textureState[stage][WINED3DTSS_BUMPENVMAT11]);
3040
3041     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0]));
3042     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])");
3043 }
3044
3045 static void tex_bumpenvlum_arbfp(DWORD state, IWineD3DStateBlockImpl *stateblock, WineD3DContext *context) {
3046     DWORD stage = (state - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1);
3047     IWineD3DDeviceImpl *device = stateblock->wineD3DDevice;
3048     float param[4];
3049
3050     if (use_ps(stateblock))
3051     {
3052         if(stage != 0 &&
3053            ((IWineD3DPixelShaderImpl *) stateblock->pixelShader)->baseShader.reg_maps.luminanceparams[stage]) {
3054             /* The pixel shader has to know the luminance offset. Do a constants update if it
3055              * isn't scheduled anyway
3056              */
3057             if(!isStateDirty(context, STATE_PIXELSHADERCONSTANT)) {
3058                 device->StateTable[STATE_PIXELSHADERCONSTANT].apply(STATE_PIXELSHADERCONSTANT, stateblock, context);
3059             }
3060         }
3061
3062         if(device->shader_backend == &arb_program_shader_backend) {
3063             /* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants */
3064             return;
3065         }
3066     } else if(device->shader_backend == &arb_program_shader_backend) {
3067         device->activeContext->pshader_const_dirty[ARB_FFP_CONST_LUMINANCE(stage)] = 1;
3068         device->highest_dirty_ps_const = max(device->highest_dirty_ps_const, ARB_FFP_CONST_LUMINANCE(stage) + 1);
3069     }
3070
3071     param[0] = *((float *) &stateblock->textureState[stage][WINED3DTSS_BUMPENVLSCALE]);
3072     param[1] = *((float *) &stateblock->textureState[stage][WINED3DTSS_BUMPENVLOFFSET]);
3073     param[2] = 0.0;
3074     param[3] = 0.0;
3075
3076     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param));
3077     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)");
3078 }
3079
3080 static const char *get_argreg(SHADER_BUFFER *buffer, DWORD argnum, unsigned int stage, DWORD arg) {
3081     const char *ret;
3082
3083     if(arg == ARG_UNUSED) return "unused"; /* This is the marker for unused registers */
3084
3085     switch(arg & WINED3DTA_SELECTMASK) {
3086         case WINED3DTA_DIFFUSE:
3087             ret = "fragment.color.primary"; break;
3088
3089         case WINED3DTA_CURRENT:
3090             if(stage == 0) ret = "fragment.color.primary";
3091             else ret = "ret";
3092             break;
3093
3094         case WINED3DTA_TEXTURE:
3095             switch(stage) {
3096                 case 0: ret = "tex0"; break;
3097                 case 1: ret = "tex1"; break;
3098                 case 2: ret = "tex2"; break;
3099                 case 3: ret = "tex3"; break;
3100                 case 4: ret = "tex4"; break;
3101                 case 5: ret = "tex5"; break;
3102                 case 6: ret = "tex6"; break;
3103                 case 7: ret = "tex7"; break;
3104                 default: ret = "unknown texture";
3105             }
3106             break;
3107
3108         case WINED3DTA_TFACTOR:
3109             ret = "tfactor"; break;
3110
3111         case WINED3DTA_SPECULAR:
3112             ret = "fragment.color.secondary"; break;
3113
3114         case WINED3DTA_TEMP:
3115             ret = "tempreg"; break;
3116
3117         case WINED3DTA_CONSTANT:
3118             FIXME("Implement perstage constants\n");
3119             switch(stage) {
3120                 case 0: ret = "const0"; break;
3121                 case 1: ret = "const1"; break;
3122                 case 2: ret = "const2"; break;
3123                 case 3: ret = "const3"; break;
3124                 case 4: ret = "const4"; break;
3125                 case 5: ret = "const5"; break;
3126                 case 6: ret = "const6"; break;
3127                 case 7: ret = "const7"; break;
3128                 default: ret = "unknown constant";
3129             }
3130             break;
3131
3132         default:
3133             return "unknown";
3134     }
3135
3136     if(arg & WINED3DTA_COMPLEMENT) {
3137         shader_addline(buffer, "SUB arg%u, const.x, %s;\n", argnum, ret);
3138         if(argnum == 0) ret = "arg0";
3139         if(argnum == 1) ret = "arg1";
3140         if(argnum == 2) ret = "arg2";
3141     }
3142     if(arg & WINED3DTA_ALPHAREPLICATE) {
3143         shader_addline(buffer, "MOV arg%u, %s.w;\n", argnum, ret);
3144         if(argnum == 0) ret = "arg0";
3145         if(argnum == 1) ret = "arg1";
3146         if(argnum == 2) ret = "arg2";
3147     }
3148     return ret;
3149 }
3150
3151 static void gen_ffp_instr(SHADER_BUFFER *buffer, unsigned int stage, BOOL color, BOOL alpha,
3152                           DWORD dst, DWORD op, DWORD dw_arg0, DWORD dw_arg1, DWORD dw_arg2) {
3153     const char *dstmask, *dstreg, *arg0, *arg1, *arg2;
3154     unsigned int mul = 1;
3155     BOOL mul_final_dest = FALSE;
3156
3157     if(color && alpha) dstmask = "";
3158     else if(color) dstmask = ".xyz";
3159     else dstmask = ".w";
3160
3161     if(dst == tempreg) dstreg = "tempreg";
3162     else dstreg = "ret";
3163
3164     arg0 = get_argreg(buffer, 0, stage, dw_arg0);
3165     arg1 = get_argreg(buffer, 1, stage, dw_arg1);
3166     arg2 = get_argreg(buffer, 2, stage, dw_arg2);
3167
3168     switch(op) {
3169         case WINED3DTOP_DISABLE:
3170             if(stage == 0) shader_addline(buffer, "MOV %s%s, fragment.color.primary;\n", dstreg, dstmask);
3171             break;
3172
3173         case WINED3DTOP_SELECTARG2:
3174             arg1 = arg2;
3175         case WINED3DTOP_SELECTARG1:
3176             shader_addline(buffer, "MOV %s%s, %s;\n", dstreg, dstmask, arg1);
3177             break;
3178
3179         case WINED3DTOP_MODULATE4X:
3180             mul = 2;
3181         case WINED3DTOP_MODULATE2X:
3182             mul *= 2;
3183             if(strcmp(dstreg, "result.color") == 0) {
3184                 dstreg = "ret";
3185                 mul_final_dest = TRUE;
3186             }
3187         case WINED3DTOP_MODULATE:
3188             shader_addline(buffer, "MUL %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2);
3189             break;
3190
3191         case WINED3DTOP_ADDSIGNED2X:
3192             mul = 2;
3193             if(strcmp(dstreg, "result.color") == 0) {
3194                 dstreg = "ret";
3195                 mul_final_dest = TRUE;
3196             }
3197         case WINED3DTOP_ADDSIGNED:
3198             shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2);
3199             arg2 = "arg2";
3200         case WINED3DTOP_ADD:
3201             shader_addline(buffer, "ADD_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2);
3202             break;
3203
3204         case WINED3DTOP_SUBTRACT:
3205             shader_addline(buffer, "SUB_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2);
3206             break;
3207
3208         case WINED3DTOP_ADDSMOOTH:
3209             shader_addline(buffer, "SUB arg1, const.x, %s;\n", arg1);
3210             shader_addline(buffer, "MAD_SAT %s%s, arg1, %s, %s;\n", dstreg, dstmask, arg2, arg1);
3211             break;
3212
3213         case WINED3DTOP_BLENDCURRENTALPHA:
3214             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_CURRENT);
3215             shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
3216             break;
3217         case WINED3DTOP_BLENDFACTORALPHA:
3218             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TFACTOR);
3219             shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
3220             break;
3221         case WINED3DTOP_BLENDTEXTUREALPHA:
3222             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE);
3223             shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
3224             break;
3225         case WINED3DTOP_BLENDDIFFUSEALPHA:
3226             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_DIFFUSE);
3227             shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
3228             break;
3229
3230         case WINED3DTOP_BLENDTEXTUREALPHAPM:
3231             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE);
3232             shader_addline(buffer, "SUB arg0.w, const.x, %s.w;\n", arg0);
3233             shader_addline(buffer, "MAD_SAT %s%s, %s, arg0.w, %s;\n", dstreg, dstmask, arg2, arg1);
3234             break;
3235
3236         /* D3DTOP_PREMODULATE ???? */
3237
3238         case WINED3DTOP_MODULATEINVALPHA_ADDCOLOR:
3239             shader_addline(buffer, "SUB arg0.w, const.x, %s;\n", arg1);
3240             shader_addline(buffer, "MAD_SAT %s%s, arg0.w, %s, %s;\n", dstreg, dstmask, arg2, arg1);
3241             break;
3242         case WINED3DTOP_MODULATEALPHA_ADDCOLOR:
3243             shader_addline(buffer, "MAD_SAT %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg1);
3244             break;
3245         case WINED3DTOP_MODULATEINVCOLOR_ADDALPHA:
3246             shader_addline(buffer, "SUB arg0, const.x, %s;\n", arg1);
3247             shader_addline(buffer, "MAD_SAT %s%s, arg0, %s, %s.w;\n", dstreg, dstmask, arg2, arg1);
3248             break;
3249         case WINED3DTOP_MODULATECOLOR_ADDALPHA:
3250             shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s.w;\n", dstreg, dstmask, arg1, arg2, arg1);
3251             break;
3252
3253         case WINED3DTOP_DOTPRODUCT3:
3254             mul = 4;
3255             if(strcmp(dstreg, "result.color") == 0) {
3256                 dstreg = "ret";
3257                 mul_final_dest = TRUE;
3258             }
3259             shader_addline(buffer, "SUB arg1, %s, const.w;\n", arg1);
3260             shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2);
3261             shader_addline(buffer, "DP3_SAT %s%s, arg1, arg2;\n", dstreg, dstmask);
3262             break;
3263
3264         case WINED3DTOP_MULTIPLYADD:
3265             shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg0);
3266             break;
3267
3268         case WINED3DTOP_LERP:
3269             /* The msdn is not quite right here */
3270             shader_addline(buffer, "LRP %s%s, %s, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
3271             break;
3272
3273         case WINED3DTOP_BUMPENVMAP:
3274         case WINED3DTOP_BUMPENVMAPLUMINANCE:
3275             /* Those are handled in the first pass of the shader(generation pass 1 and 2) already */
3276             break;
3277
3278         default:
3279             FIXME("Unhandled texture op %08x\n", op);
3280     }
3281
3282     if(mul == 2) {
3283         shader_addline(buffer, "MUL_SAT %s%s, %s, const.y;\n", mul_final_dest ? "result.color" : dstreg, dstmask, dstreg);
3284     } else if(mul == 4) {
3285         shader_addline(buffer, "MUL_SAT %s%s, %s, const.z;\n", mul_final_dest ? "result.color" : dstreg, dstmask, dstreg);
3286     }
3287 }
3288
3289 /* The stateblock is passed for GLINFO_LOCATION */
3290 static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, IWineD3DStateBlockImpl *stateblock)
3291 {
3292     unsigned int stage;
3293     SHADER_BUFFER buffer;
3294     BOOL tex_read[MAX_TEXTURES] = {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE};
3295     BOOL bump_used[MAX_TEXTURES] = {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE};
3296     BOOL luminance_used[MAX_TEXTURES] = {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE};
3297     const char *textype;
3298     const char *instr, *sat;
3299     char colorcor_dst[8];
3300     GLuint ret;
3301     DWORD arg0, arg1, arg2;
3302     BOOL tempreg_used = FALSE, tfactor_used = FALSE;
3303     BOOL op_equal;
3304     const char *final_combiner_src = "ret";
3305
3306     /* Find out which textures are read */
3307     for(stage = 0; stage < MAX_TEXTURES; stage++) {
3308         if(settings->op[stage].cop == WINED3DTOP_DISABLE) break;
3309         arg0 = settings->op[stage].carg0 & WINED3DTA_SELECTMASK;
3310         arg1 = settings->op[stage].carg1 & WINED3DTA_SELECTMASK;
3311         arg2 = settings->op[stage].carg2 & WINED3DTA_SELECTMASK;
3312         if(arg0 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE;
3313         if(arg1 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE;
3314         if(arg2 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE;
3315
3316         if(settings->op[stage].cop == WINED3DTOP_BLENDTEXTUREALPHA) tex_read[stage] = TRUE;
3317         if(settings->op[stage].cop == WINED3DTOP_BLENDTEXTUREALPHAPM) tex_read[stage] = TRUE;
3318         if(settings->op[stage].cop == WINED3DTOP_BUMPENVMAP) {
3319             bump_used[stage] = TRUE;
3320             tex_read[stage] = TRUE;
3321         }
3322         if(settings->op[stage].cop == WINED3DTOP_BUMPENVMAPLUMINANCE) {
3323             bump_used[stage] = TRUE;
3324             tex_read[stage] = TRUE;
3325             luminance_used[stage] = TRUE;
3326         } else if(settings->op[stage].cop == WINED3DTOP_BLENDFACTORALPHA) {
3327             tfactor_used = TRUE;
3328         }
3329
3330         if(arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) {
3331             tfactor_used = TRUE;
3332         }
3333
3334         if(settings->op[stage].dst == tempreg) tempreg_used = TRUE;
3335         if(arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) {
3336             tempreg_used = TRUE;
3337         }
3338
3339         if(settings->op[stage].aop == WINED3DTOP_DISABLE) continue;
3340         arg0 = settings->op[stage].aarg0 & WINED3DTA_SELECTMASK;
3341         arg1 = settings->op[stage].aarg1 & WINED3DTA_SELECTMASK;
3342         arg2 = settings->op[stage].aarg2 & WINED3DTA_SELECTMASK;
3343         if(arg0 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE;
3344         if(arg1 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE;
3345         if(arg2 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE;
3346
3347         if(arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) {
3348             tempreg_used = TRUE;
3349         }
3350         if(arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) {
3351             tfactor_used = TRUE;
3352         }
3353     }
3354
3355     /* Shader header */
3356     shader_buffer_init(&buffer);
3357
3358     shader_addline(&buffer, "!!ARBfp1.0\n");
3359
3360     switch(settings->fog) {
3361         case FOG_OFF:                                                         break;
3362         case FOG_LINEAR: shader_addline(&buffer, "OPTION ARB_fog_linear;\n"); break;
3363         case FOG_EXP:    shader_addline(&buffer, "OPTION ARB_fog_exp;\n");    break;
3364         case FOG_EXP2:   shader_addline(&buffer, "OPTION ARB_fog_exp2;\n");   break;
3365         default: FIXME("Unexpected fog setting %d\n", settings->fog);
3366     }
3367
3368     shader_addline(&buffer, "PARAM const = {1, 2, 4, 0.5};\n");
3369     shader_addline(&buffer, "TEMP TMP;\n");
3370     shader_addline(&buffer, "TEMP ret;\n");
3371     if(tempreg_used || settings->sRGB_write) shader_addline(&buffer, "TEMP tempreg;\n");
3372     shader_addline(&buffer, "TEMP arg0;\n");
3373     shader_addline(&buffer, "TEMP arg1;\n");
3374     shader_addline(&buffer, "TEMP arg2;\n");
3375     for(stage = 0; stage < MAX_TEXTURES; stage++) {
3376         if(!tex_read[stage]) continue;
3377         shader_addline(&buffer, "TEMP tex%u;\n", stage);
3378         if(!bump_used[stage]) continue;
3379         shader_addline(&buffer, "PARAM bumpmat%u = program.env[%u];\n", stage, ARB_FFP_CONST_BUMPMAT(stage));
3380         if(!luminance_used[stage]) continue;
3381         shader_addline(&buffer, "PARAM luminance%u = program.env[%u];\n", stage, ARB_FFP_CONST_LUMINANCE(stage));
3382     }
3383     if(tfactor_used) {
3384         shader_addline(&buffer, "PARAM tfactor = program.env[%u];\n", ARB_FFP_CONST_TFACTOR);
3385     }
3386         shader_addline(&buffer, "PARAM specular_enable = program.env[%u];\n", ARB_FFP_CONST_SPECULAR_ENABLE);
3387
3388     if(settings->sRGB_write) {
3389         shader_addline(&buffer, "PARAM srgb_consts1 = {%f, %f, %f, %f};\n",
3390                        srgb_mul_low, srgb_cmp, srgb_pow, srgb_mul_high);
3391         shader_addline(&buffer, "PARAM srgb_consts2 = {%f, %f, %f, %f};\n",
3392                        srgb_sub_high, 0.0, 0.0, 0.0);
3393     }
3394
3395     /* Generate texture sampling instructions) */
3396     for(stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3DTOP_DISABLE; stage++) {
3397         if(!tex_read[stage]) continue;
3398
3399         switch(settings->op[stage].tex_type) {
3400             case tex_1d:                    textype = "1D";     break;
3401             case tex_2d:                    textype = "2D";     break;
3402             case tex_3d:                    textype = "3D";     break;
3403             case tex_cube:                  textype = "CUBE";   break;
3404             case tex_rect:                  textype = "RECT";   break;
3405             default: textype = "unexpected_textype";   break;
3406         }
3407
3408         if(settings->op[stage].cop == WINED3DTOP_BUMPENVMAP ||
3409            settings->op[stage].cop == WINED3DTOP_BUMPENVMAPLUMINANCE) {
3410             sat = "";
3411         } else {
3412             sat = "_SAT";
3413         }
3414
3415         if(settings->op[stage].projected == proj_none) {
3416             instr = "TEX";
3417         } else if(settings->op[stage].projected == proj_count4 ||
3418                   settings->op[stage].projected == proj_count3) {
3419             instr = "TXP";
3420         } else {
3421             FIXME("Unexpected projection mode %d\n", settings->op[stage].projected);
3422             instr = "TXP";
3423         }
3424
3425         if(stage > 0 &&
3426            (settings->op[stage - 1].cop == WINED3DTOP_BUMPENVMAP ||
3427             settings->op[stage - 1].cop == WINED3DTOP_BUMPENVMAPLUMINANCE)) {
3428             shader_addline(&buffer, "SWZ arg1, bumpmat%u, x, z, 0, 0;\n", stage - 1);
3429             shader_addline(&buffer, "DP3 ret.x, arg1, tex%u;\n", stage - 1);
3430             shader_addline(&buffer, "SWZ arg1, bumpmat%u, y, w, 0, 0;\n", stage - 1);
3431             shader_addline(&buffer, "DP3 ret.y, arg1, tex%u;\n", stage - 1);
3432
3433             /* with projective textures, texbem only divides the static texture coord, not the displacement,
3434              * so multiply the displacement with the dividing parameter before passing it to TXP
3435              */
3436             if (settings->op[stage].projected != proj_none) {
3437                 if(settings->op[stage].projected == proj_count4) {
3438                     shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].w;\n", stage);
3439                     shader_addline(&buffer, "MUL ret.xyz, ret, fragment.texcoord[%u].w, fragment.texcoord[%u];\n", stage, stage);
3440                 } else {
3441                     shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].z;\n", stage);
3442                     shader_addline(&buffer, "MAD ret.xyz, ret, fragment.texcoord[%u].z, fragment.texcoord[%u];\n", stage, stage);
3443                 }
3444             } else {
3445                 shader_addline(&buffer, "ADD ret, ret, fragment.texcoord[%u];\n", stage);
3446             }
3447
3448             shader_addline(&buffer, "%s%s tex%u, ret, texture[%u], %s;\n",
3449                            instr, sat, stage, stage, textype);
3450             if(settings->op[stage - 1].cop == WINED3DTOP_BUMPENVMAPLUMINANCE) {
3451                 shader_addline(&buffer, "MAD_SAT ret.x, tex%u.z, luminance%u.x, luminance%u.y;\n",
3452                                stage - 1, stage - 1, stage - 1);
3453                 shader_addline(&buffer, "MUL tex%u, tex%u, ret.x;\n", stage, stage);
3454             }
3455         } else if(settings->op[stage].projected == proj_count3) {
3456             shader_addline(&buffer, "MOV ret, fragment.texcoord[%u];\n", stage);
3457             shader_addline(&buffer, "MOV ret.w, ret.z;\n");
3458             shader_addline(&buffer, "%s%s tex%u, ret, texture[%u], %s;\n",
3459                             instr, sat, stage, stage, textype);
3460         } else {
3461             shader_addline(&buffer, "%s%s tex%u, fragment.texcoord[%u], texture[%u], %s;\n",
3462                             instr, sat, stage, stage, stage, textype);
3463         }
3464
3465         sprintf(colorcor_dst, "tex%u", stage);
3466         gen_color_correction(&buffer, colorcor_dst, WINED3DSP_WRITEMASK_ALL, "const.x", "const.y",
3467                 settings->op[stage].color_fixup);
3468     }
3469
3470     /* Generate the main shader */
3471     for(stage = 0; stage < MAX_TEXTURES; stage++) {
3472         if(settings->op[stage].cop == WINED3DTOP_DISABLE) {
3473             if(stage == 0) {
3474                 final_combiner_src = "fragment.color.primary";
3475             }
3476             break;
3477         }
3478
3479         if(settings->op[stage].cop == WINED3DTOP_SELECTARG1 &&
3480            settings->op[stage].aop == WINED3DTOP_SELECTARG1) {
3481             op_equal = settings->op[stage].carg1 == settings->op[stage].aarg1;
3482         } else if(settings->op[stage].cop == WINED3DTOP_SELECTARG1 &&
3483                   settings->op[stage].aop == WINED3DTOP_SELECTARG2) {
3484             op_equal = settings->op[stage].carg1 == settings->op[stage].aarg2;
3485         } else if(settings->op[stage].cop == WINED3DTOP_SELECTARG2 &&
3486                   settings->op[stage].aop == WINED3DTOP_SELECTARG1) {
3487             op_equal = settings->op[stage].carg2 == settings->op[stage].aarg1;
3488         } else if(settings->op[stage].cop == WINED3DTOP_SELECTARG2 &&
3489                   settings->op[stage].aop == WINED3DTOP_SELECTARG2) {
3490             op_equal = settings->op[stage].carg2 == settings->op[stage].aarg2;
3491         } else {
3492             op_equal = settings->op[stage].aop   == settings->op[stage].cop &&
3493                        settings->op[stage].carg0 == settings->op[stage].aarg0 &&
3494                        settings->op[stage].carg1 == settings->op[stage].aarg1 &&
3495                        settings->op[stage].carg2 == settings->op[stage].aarg2;
3496         }
3497
3498         if(settings->op[stage].aop == WINED3DTOP_DISABLE) {
3499             gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst,
3500                           settings->op[stage].cop, settings->op[stage].carg0,
3501                           settings->op[stage].carg1, settings->op[stage].carg2);
3502             if(stage == 0) {
3503                 shader_addline(&buffer, "MOV ret.w, fragment.color.primary.w;\n");
3504             }
3505         } else if(op_equal) {
3506             gen_ffp_instr(&buffer, stage, TRUE, TRUE, settings->op[stage].dst,
3507                           settings->op[stage].cop, settings->op[stage].carg0,
3508                           settings->op[stage].carg1, settings->op[stage].carg2);
3509         } else {
3510             gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst,
3511                           settings->op[stage].cop, settings->op[stage].carg0,
3512                           settings->op[stage].carg1, settings->op[stage].carg2);
3513             gen_ffp_instr(&buffer, stage, FALSE, TRUE, settings->op[stage].dst,
3514                           settings->op[stage].aop, settings->op[stage].aarg0,
3515                           settings->op[stage].aarg1, settings->op[stage].aarg2);
3516         }
3517     }
3518
3519     if(settings->sRGB_write) {
3520         shader_addline(&buffer, "MAD ret, fragment.color.secondary, specular_enable, %s;\n", final_combiner_src);
3521         arbfp_add_sRGB_correction(&buffer, "ret", "arg0", "arg1", "arg2");
3522         shader_addline(&buffer, "MOV result.color.w, ret.w;\n");
3523     } else {
3524         shader_addline(&buffer, "MAD result.color, fragment.color.secondary, specular_enable, %s;\n", final_combiner_src);
3525     }
3526
3527     /* Footer */
3528     shader_addline(&buffer, "END\n");
3529
3530     /* Generate the shader */
3531     GL_EXTCALL(glGenProgramsARB(1, &ret));
3532     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ret));
3533     GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(buffer.buffer), buffer.buffer));
3534
3535     if (glGetError() == GL_INVALID_OPERATION) {
3536         GLint pos;
3537         glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
3538         FIXME("Fragment program error at position %d: %s\n", pos,
3539               debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
3540     }
3541     shader_buffer_free(&buffer);
3542     return ret;
3543 }
3544
3545 static void fragment_prog_arbfp(DWORD state, IWineD3DStateBlockImpl *stateblock, WineD3DContext *context) {
3546     IWineD3DDeviceImpl *device = stateblock->wineD3DDevice;
3547     struct shader_arb_priv *priv = device->fragment_priv;
3548     BOOL use_pshader = use_ps(stateblock);
3549     BOOL use_vshader = use_vs(stateblock);
3550     struct ffp_frag_settings settings;
3551     const struct arbfp_ffp_desc *desc;
3552     unsigned int i;
3553
3554     TRACE("state %#x, stateblock %p, context %p\n", state, stateblock, context);
3555
3556     if(isStateDirty(context, STATE_RENDER(WINED3DRS_FOGENABLE))) {
3557         if(!use_pshader && device->shader_backend == &arb_program_shader_backend && context->last_was_pshader) {
3558             /* Reload fixed function constants since they collide with the pixel shader constants */
3559             for(i = 0; i < MAX_TEXTURES; i++) {
3560                 set_bumpmat_arbfp(STATE_TEXTURESTAGE(i, WINED3DTSS_BUMPENVMAT00), stateblock, context);
3561             }
3562             state_texfactor_arbfp(STATE_RENDER(WINED3DRS_TEXTUREFACTOR), stateblock, context);
3563             state_arb_specularenable(STATE_RENDER(WINED3DRS_SPECULARENABLE), stateblock, context);
3564         } else if(use_pshader && !isStateDirty(context, device->StateTable[STATE_VSHADER].representative)) {
3565             device->shader_backend->shader_select((IWineD3DDevice *)stateblock->wineD3DDevice, use_pshader, use_vshader);
3566         }
3567         return;
3568     }
3569
3570     if(!use_pshader) {
3571         /* Find or create a shader implementing the fixed function pipeline settings, then activate it */
3572         gen_ffp_frag_op(stateblock, &settings, FALSE);
3573         desc = (const struct arbfp_ffp_desc *)find_ffp_frag_shader(priv->fragment_shaders, &settings);
3574         if(!desc) {
3575             struct arbfp_ffp_desc *new_desc = HeapAlloc(GetProcessHeap(), 0, sizeof(*new_desc));
3576             if (!new_desc)
3577             {
3578                 ERR("Out of memory\n");
3579                 return;
3580             }
3581             new_desc->num_textures_used = 0;
3582             for(i = 0; i < GL_LIMITS(texture_stages); i++) {
3583                 if(settings.op[i].cop == WINED3DTOP_DISABLE) break;
3584                 new_desc->num_textures_used = i;
3585             }
3586
3587             memcpy(&new_desc->parent.settings, &settings, sizeof(settings));
3588             new_desc->shader = gen_arbfp_ffp_shader(&settings, stateblock);
3589             add_ffp_frag_shader(priv->fragment_shaders, &new_desc->parent);
3590             TRACE("Allocated fixed function replacement shader descriptor %p\n", new_desc);
3591             desc = new_desc;
3592         }
3593
3594         /* Now activate the replacement program. GL_FRAGMENT_PROGRAM_ARB is already active(however, note the
3595          * comment above the shader_select call below). If e.g. GLSL is active, the shader_select call will
3596          * deactivate it.
3597          */
3598         GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader));
3599         checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)");
3600         priv->current_fprogram_id = desc->shader;
3601
3602         if(device->shader_backend == &arb_program_shader_backend && context->last_was_pshader) {
3603             /* Reload fixed function constants since they collide with the pixel shader constants */
3604             for(i = 0; i < MAX_TEXTURES; i++) {
3605                 set_bumpmat_arbfp(STATE_TEXTURESTAGE(i, WINED3DTSS_BUMPENVMAT00), stateblock, context);
3606             }
3607             state_texfactor_arbfp(STATE_RENDER(WINED3DRS_TEXTUREFACTOR), stateblock, context);
3608             state_arb_specularenable(STATE_RENDER(WINED3DRS_SPECULARENABLE), stateblock, context);
3609         }
3610         context->last_was_pshader = FALSE;
3611     } else {
3612         context->last_was_pshader = TRUE;
3613     }
3614
3615     /* Finally, select the shader. If a pixel shader is used, it will be set and enabled by the shader backend.
3616      * If this shader backend is arbfp(most likely), then it will simply overwrite the last fixed function replace-
3617      * ment shader. If the shader backend is not ARB, it currently is important that the opengl implementation
3618      * type overwrites GL_ARB_fragment_program. This is currently the case with GLSL. If we really want to use
3619      * atifs or nvrc pixel shaders with arb fragment programs we'd have to disable GL_FRAGMENT_PROGRAM_ARB here
3620      *
3621      * Don't call shader_select if the vertex shader is dirty, because it will be called later on by the vertex
3622      * shader handler
3623      */
3624     if(!isStateDirty(context, device->StateTable[STATE_VSHADER].representative)) {
3625         device->shader_backend->shader_select((IWineD3DDevice *)stateblock->wineD3DDevice, use_pshader, use_vshader);
3626
3627         if (!isStateDirty(context, STATE_VERTEXSHADERCONSTANT) && (use_vshader || use_pshader)) {
3628             device->StateTable[STATE_VERTEXSHADERCONSTANT].apply(STATE_VERTEXSHADERCONSTANT, stateblock, context);
3629         }
3630     }
3631     if(use_pshader) {
3632         device->StateTable[STATE_PIXELSHADERCONSTANT].apply(STATE_PIXELSHADERCONSTANT, stateblock, context);
3633     }
3634 }
3635
3636 /* We can't link the fog states to the fragment state directly since the vertex pipeline links them
3637  * to FOGENABLE. A different linking in different pipeline parts can't be expressed in the combined
3638  * state table, so we need to handle that with a forwarding function. The other invisible side effect
3639  * is that changing the fog start and fog end(which links to FOGENABLE in vertex) results in the
3640  * fragment_prog_arbfp function being called because FOGENABLE is dirty, which calls this function here
3641  */
3642 static void state_arbfp_fog(DWORD state, IWineD3DStateBlockImpl *stateblock, WineD3DContext *context) {
3643     enum fogsource new_source;
3644
3645     TRACE("state %#x, stateblock %p, context %p\n", state, stateblock, context);
3646
3647     if(!isStateDirty(context, STATE_PIXELSHADER)) {
3648         fragment_prog_arbfp(state, stateblock, context);
3649     }
3650
3651     if(!stateblock->renderState[WINED3DRS_FOGENABLE]) return;
3652
3653     if(stateblock->renderState[WINED3DRS_FOGTABLEMODE] == WINED3DFOG_NONE) {
3654         if(use_vs(stateblock)) {
3655             new_source = FOGSOURCE_VS;
3656         } else {
3657             if(stateblock->renderState[WINED3DRS_FOGVERTEXMODE] == WINED3DFOG_NONE || context->last_was_rhw) {
3658                 new_source = FOGSOURCE_COORD;
3659             } else {
3660                 new_source = FOGSOURCE_FFP;
3661             }
3662         }
3663     } else {
3664         new_source = FOGSOURCE_FFP;
3665     }
3666     if(new_source != context->fog_source) {
3667         context->fog_source = new_source;
3668         state_fogstartend(STATE_RENDER(WINED3DRS_FOGSTART), stateblock, context);
3669     }
3670 }
3671
3672 static void textransform(DWORD state, IWineD3DStateBlockImpl *stateblock, WineD3DContext *context) {
3673     if(!isStateDirty(context, STATE_PIXELSHADER)) {
3674         fragment_prog_arbfp(state, stateblock, context);
3675     }
3676 }
3677
3678 #undef GLINFO_LOCATION
3679
3680 static const struct StateEntryTemplate arbfp_fragmentstate_template[] = {
3681     {STATE_RENDER(WINED3DRS_TEXTUREFACTOR),               { STATE_RENDER(WINED3DRS_TEXTUREFACTOR),              state_texfactor_arbfp   }, 0                               },
3682     {STATE_TEXTURESTAGE(0, WINED3DTSS_COLOROP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3683     {STATE_TEXTURESTAGE(0, WINED3DTSS_COLORARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3684     {STATE_TEXTURESTAGE(0, WINED3DTSS_COLORARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3685     {STATE_TEXTURESTAGE(0, WINED3DTSS_COLORARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3686     {STATE_TEXTURESTAGE(0, WINED3DTSS_ALPHAOP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3687     {STATE_TEXTURESTAGE(0, WINED3DTSS_ALPHAARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3688     {STATE_TEXTURESTAGE(0, WINED3DTSS_ALPHAARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3689     {STATE_TEXTURESTAGE(0, WINED3DTSS_ALPHAARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3690     {STATE_TEXTURESTAGE(0, WINED3DTSS_RESULTARG),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3691     {STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00),      { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3692     {STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT01),      { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3693     {STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT10),      { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3694     {STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT11),      { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3695     {STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVLSCALE),     { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3696     {STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVLOFFSET),    { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3697     {STATE_TEXTURESTAGE(1, WINED3DTSS_COLOROP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3698     {STATE_TEXTURESTAGE(1, WINED3DTSS_COLORARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3699     {STATE_TEXTURESTAGE(1, WINED3DTSS_COLORARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3700     {STATE_TEXTURESTAGE(1, WINED3DTSS_COLORARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3701     {STATE_TEXTURESTAGE(1, WINED3DTSS_ALPHAOP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3702     {STATE_TEXTURESTAGE(1, WINED3DTSS_ALPHAARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3703     {STATE_TEXTURESTAGE(1, WINED3DTSS_ALPHAARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3704     {STATE_TEXTURESTAGE(1, WINED3DTSS_ALPHAARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3705     {STATE_TEXTURESTAGE(1, WINED3DTSS_RESULTARG),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3706     {STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00),      { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3707     {STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT01),      { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3708     {STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT10),      { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3709     {STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT11),      { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3710     {STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVLSCALE),     { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3711     {STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVLOFFSET),    { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3712     {STATE_TEXTURESTAGE(2, WINED3DTSS_COLOROP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3713     {STATE_TEXTURESTAGE(2, WINED3DTSS_COLORARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3714     {STATE_TEXTURESTAGE(2, WINED3DTSS_COLORARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3715     {STATE_TEXTURESTAGE(2, WINED3DTSS_COLORARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3716     {STATE_TEXTURESTAGE(2, WINED3DTSS_ALPHAOP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3717     {STATE_TEXTURESTAGE(2, WINED3DTSS_ALPHAARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3718     {STATE_TEXTURESTAGE(2, WINED3DTSS_ALPHAARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3719     {STATE_TEXTURESTAGE(2, WINED3DTSS_ALPHAARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3720     {STATE_TEXTURESTAGE(2, WINED3DTSS_RESULTARG),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3721     {STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00),      { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3722     {STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT01),      { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3723     {STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT10),      { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3724     {STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT11),      { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3725     {STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVLSCALE),     { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3726     {STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVLOFFSET),    { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3727     {STATE_TEXTURESTAGE(3, WINED3DTSS_COLOROP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3728     {STATE_TEXTURESTAGE(3, WINED3DTSS_COLORARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3729     {STATE_TEXTURESTAGE(3, WINED3DTSS_COLORARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3730     {STATE_TEXTURESTAGE(3, WINED3DTSS_COLORARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3731     {STATE_TEXTURESTAGE(3, WINED3DTSS_ALPHAOP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3732     {STATE_TEXTURESTAGE(3, WINED3DTSS_ALPHAARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3733     {STATE_TEXTURESTAGE(3, WINED3DTSS_ALPHAARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3734     {STATE_TEXTURESTAGE(3, WINED3DTSS_ALPHAARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3735     {STATE_TEXTURESTAGE(3, WINED3DTSS_RESULTARG),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3736     {STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00),      { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3737     {STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT01),      { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3738     {STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT10),      { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3739     {STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT11),      { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3740     {STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVLSCALE),     { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3741     {STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVLOFFSET),    { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3742     {STATE_TEXTURESTAGE(4, WINED3DTSS_COLOROP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3743     {STATE_TEXTURESTAGE(4, WINED3DTSS_COLORARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3744     {STATE_TEXTURESTAGE(4, WINED3DTSS_COLORARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3745     {STATE_TEXTURESTAGE(4, WINED3DTSS_COLORARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3746     {STATE_TEXTURESTAGE(4, WINED3DTSS_ALPHAOP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3747     {STATE_TEXTURESTAGE(4, WINED3DTSS_ALPHAARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3748     {STATE_TEXTURESTAGE(4, WINED3DTSS_ALPHAARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3749     {STATE_TEXTURESTAGE(4, WINED3DTSS_ALPHAARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3750     {STATE_TEXTURESTAGE(4, WINED3DTSS_RESULTARG),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3751     {STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00),      { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3752     {STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT01),      { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3753     {STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT10),      { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3754     {STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT11),      { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3755     {STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVLSCALE),     { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3756     {STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVLOFFSET),    { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3757     {STATE_TEXTURESTAGE(5, WINED3DTSS_COLOROP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3758     {STATE_TEXTURESTAGE(5, WINED3DTSS_COLORARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3759     {STATE_TEXTURESTAGE(5, WINED3DTSS_COLORARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3760     {STATE_TEXTURESTAGE(5, WINED3DTSS_COLORARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3761     {STATE_TEXTURESTAGE(5, WINED3DTSS_ALPHAOP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3762     {STATE_TEXTURESTAGE(5, WINED3DTSS_ALPHAARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3763     {STATE_TEXTURESTAGE(5, WINED3DTSS_ALPHAARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3764     {STATE_TEXTURESTAGE(5, WINED3DTSS_ALPHAARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3765     {STATE_TEXTURESTAGE(5, WINED3DTSS_RESULTARG),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3766     {STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00),      { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3767     {STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT01),      { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3768     {STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT10),      { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3769     {STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT11),      { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3770     {STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVLSCALE),     { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3771     {STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVLOFFSET),    { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3772     {STATE_TEXTURESTAGE(6, WINED3DTSS_COLOROP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3773     {STATE_TEXTURESTAGE(6, WINED3DTSS_COLORARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3774     {STATE_TEXTURESTAGE(6, WINED3DTSS_COLORARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3775     {STATE_TEXTURESTAGE(6, WINED3DTSS_COLORARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3776     {STATE_TEXTURESTAGE(6, WINED3DTSS_ALPHAOP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3777     {STATE_TEXTURESTAGE(6, WINED3DTSS_ALPHAARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3778     {STATE_TEXTURESTAGE(6, WINED3DTSS_ALPHAARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3779     {STATE_TEXTURESTAGE(6, WINED3DTSS_ALPHAARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3780     {STATE_TEXTURESTAGE(6, WINED3DTSS_RESULTARG),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3781     {STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00),      { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3782     {STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT01),      { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3783     {STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT10),      { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3784     {STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT11),      { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3785     {STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVLSCALE),     { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3786     {STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVLOFFSET),    { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3787     {STATE_TEXTURESTAGE(7, WINED3DTSS_COLOROP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3788     {STATE_TEXTURESTAGE(7, WINED3DTSS_COLORARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3789     {STATE_TEXTURESTAGE(7, WINED3DTSS_COLORARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3790     {STATE_TEXTURESTAGE(7, WINED3DTSS_COLORARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3791     {STATE_TEXTURESTAGE(7, WINED3DTSS_ALPHAOP),           { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3792     {STATE_TEXTURESTAGE(7, WINED3DTSS_ALPHAARG1),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3793     {STATE_TEXTURESTAGE(7, WINED3DTSS_ALPHAARG2),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3794     {STATE_TEXTURESTAGE(7, WINED3DTSS_ALPHAARG0),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3795     {STATE_TEXTURESTAGE(7, WINED3DTSS_RESULTARG),         { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3796     {STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00),      { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3797     {STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT01),      { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3798     {STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT10),      { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3799     {STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT11),      { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00),     set_bumpmat_arbfp       }, 0                               },
3800     {STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVLSCALE),     { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3801     {STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVLOFFSET),    { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVLSCALE),    tex_bumpenvlum_arbfp    }, 0                               },
3802     {STATE_SAMPLER(0),                                    { STATE_SAMPLER(0),                                   sampler_texdim          }, 0                               },
3803     {STATE_SAMPLER(1),                                    { STATE_SAMPLER(1),                                   sampler_texdim          }, 0                               },
3804     {STATE_SAMPLER(2),                                    { STATE_SAMPLER(2),                                   sampler_texdim          }, 0                               },
3805     {STATE_SAMPLER(3),                                    { STATE_SAMPLER(3),                                   sampler_texdim          }, 0                               },
3806     {STATE_SAMPLER(4),                                    { STATE_SAMPLER(4),                                   sampler_texdim          }, 0                               },
3807     {STATE_SAMPLER(5),                                    { STATE_SAMPLER(5),                                   sampler_texdim          }, 0                               },
3808     {STATE_SAMPLER(6),                                    { STATE_SAMPLER(6),                                   sampler_texdim          }, 0                               },
3809     {STATE_SAMPLER(7),                                    { STATE_SAMPLER(7),                                   sampler_texdim          }, 0                               },
3810     {STATE_PIXELSHADER,                                   { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3811     {STATE_RENDER(WINED3DRS_FOGENABLE),                   { STATE_RENDER(WINED3DRS_FOGENABLE),                  state_arbfp_fog         }, 0                               },
3812     {STATE_RENDER(WINED3DRS_FOGTABLEMODE),                { STATE_RENDER(WINED3DRS_FOGENABLE),                  state_arbfp_fog         }, 0                               },
3813     {STATE_RENDER(WINED3DRS_FOGVERTEXMODE),               { STATE_RENDER(WINED3DRS_FOGENABLE),                  state_arbfp_fog         }, 0                               },
3814     {STATE_RENDER(WINED3DRS_FOGSTART),                    { STATE_RENDER(WINED3DRS_FOGSTART),                   state_fogstartend       }, 0                               },
3815     {STATE_RENDER(WINED3DRS_FOGEND),                      { STATE_RENDER(WINED3DRS_FOGSTART),                   state_fogstartend       }, 0                               },
3816     {STATE_RENDER(WINED3DRS_SRGBWRITEENABLE),             { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, 0                               },
3817     {STATE_RENDER(WINED3DRS_FOGCOLOR),                    { STATE_RENDER(WINED3DRS_FOGCOLOR),                   state_fogcolor          }, 0                               },
3818     {STATE_RENDER(WINED3DRS_FOGDENSITY),                  { STATE_RENDER(WINED3DRS_FOGDENSITY),                 state_fogdensity        }, 0                               },
3819     {STATE_TEXTURESTAGE(0,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(0, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform      }, 0                               },
3820     {STATE_TEXTURESTAGE(1,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(1, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform      }, 0                               },
3821     {STATE_TEXTURESTAGE(2,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(2, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform      }, 0                               },
3822     {STATE_TEXTURESTAGE(3,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(3, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform      }, 0                               },
3823     {STATE_TEXTURESTAGE(4,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(4, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform      }, 0                               },
3824     {STATE_TEXTURESTAGE(5,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(5, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform      }, 0                               },
3825     {STATE_TEXTURESTAGE(6,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(6, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform      }, 0                               },
3826     {STATE_TEXTURESTAGE(7,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(7, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform      }, 0                               },
3827     {STATE_RENDER(WINED3DRS_SPECULARENABLE),              { STATE_RENDER(WINED3DRS_SPECULARENABLE),             state_arb_specularenable}, 0                               },
3828     {0 /* Terminate */,                                   { 0,                                                  0                       }, 0                               },
3829 };
3830
3831 const struct fragment_pipeline arbfp_fragment_pipeline = {
3832     arbfp_enable,
3833     arbfp_get_caps,
3834     arbfp_alloc,
3835     arbfp_free,
3836     shader_arb_color_fixup_supported,
3837     arbfp_fragmentstate_template,
3838     TRUE /* We can disable projected textures */
3839 };
3840
3841 #define GLINFO_LOCATION device->adapter->gl_info
3842
3843 struct arbfp_blit_priv {
3844     GLenum yuy2_rect_shader, yuy2_2d_shader;
3845     GLenum uyvy_rect_shader, uyvy_2d_shader;
3846     GLenum yv12_rect_shader, yv12_2d_shader;
3847 };
3848
3849 static HRESULT arbfp_blit_alloc(IWineD3DDevice *iface) {
3850     IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) iface;
3851     device->blit_priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct arbfp_blit_priv));
3852     if(!device->blit_priv) {
3853         ERR("Out of memory\n");
3854         return E_OUTOFMEMORY;
3855     }
3856     return WINED3D_OK;
3857 }
3858 static void arbfp_blit_free(IWineD3DDevice *iface) {
3859     IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) iface;
3860     struct arbfp_blit_priv *priv = device->blit_priv;
3861
3862     ENTER_GL();
3863     GL_EXTCALL(glDeleteProgramsARB(1, &priv->yuy2_rect_shader));
3864     GL_EXTCALL(glDeleteProgramsARB(1, &priv->yuy2_2d_shader));
3865     GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_rect_shader));
3866     GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_2d_shader));
3867     GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_rect_shader));
3868     GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_2d_shader));
3869     checkGLcall("Delete yuv programs\n");
3870     LEAVE_GL();
3871 }
3872
3873 static BOOL gen_planar_yuv_read(SHADER_BUFFER *buffer, enum yuv_fixup yuv_fixup, GLenum textype, char *luminance)
3874 {
3875     char chroma;
3876     const char *tex, *texinstr;
3877
3878     if (yuv_fixup == YUV_FIXUP_UYVY) {
3879         chroma = 'x';
3880         *luminance = 'w';
3881     } else {
3882         chroma = 'w';
3883         *luminance = 'x';
3884     }
3885     switch(textype) {
3886         case GL_TEXTURE_2D:             tex = "2D";     texinstr = "TXP"; break;
3887         case GL_TEXTURE_RECTANGLE_ARB:  tex = "RECT";   texinstr = "TEX"; break;
3888         default:
3889             /* This is more tricky than just replacing the texture type - we have to navigate
3890              * properly in the texture to find the correct chroma values
3891              */
3892             FIXME("Implement yuv correction for non-2d, non-rect textures\n");
3893             return FALSE;
3894     }
3895
3896     /* First we have to read the chroma values. This means we need at least two pixels(no filtering),
3897      * or 4 pixels(with filtering). To get the unmodified chromas, we have to rid ourselves of the
3898      * filtering when we sample the texture.
3899      *
3900      * These are the rules for reading the chroma:
3901      *
3902      * Even pixel: Cr
3903      * Even pixel: U
3904      * Odd pixel: V
3905      *
3906      * So we have to get the sampling x position in non-normalized coordinates in integers
3907      */
3908     if(textype != GL_TEXTURE_RECTANGLE_ARB) {
3909         shader_addline(buffer, "MUL texcrd.xy, fragment.texcoord[0], size.x;\n");
3910         shader_addline(buffer, "MOV texcrd.w, size.x;\n");
3911     } else {
3912         shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
3913     }
3914     /* We must not allow filtering between pixel x and x+1, this would mix U and V
3915      * Vertical filtering is ok. However, bear in mind that the pixel center is at
3916      * 0.5, so add 0.5.
3917      */
3918     shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
3919     shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.y;\n");
3920
3921     /* Divide the x coordinate by 0.5 and get the fraction. This gives 0.25 and 0.75 for the
3922      * even and odd pixels respectively
3923      */
3924     shader_addline(buffer, "MUL texcrd2, texcrd, coef.y;\n");
3925     shader_addline(buffer, "FRC texcrd2, texcrd2;\n");
3926
3927     /* Sample Pixel 1 */
3928     shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex);
3929
3930     /* Put the value into either of the chroma values */
3931     shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n");
3932     shader_addline(buffer, "MUL chroma.x, luminance.%c, temp.x;\n", chroma);
3933     shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n");
3934     shader_addline(buffer, "MUL chroma.y, luminance.%c, temp.x;\n", chroma);
3935
3936     /* Sample pixel 2. If we read an even pixel(SLT above returned 1), sample
3937      * the pixel right to the current one. Otherwise, sample the left pixel.
3938      * Bias and scale the SLT result to -1;1 and add it to the texcrd.x.
3939      */
3940     shader_addline(buffer, "MAD temp.x, temp.x, coef.z, -coef.x;\n");
3941     shader_addline(buffer, "ADD texcrd.x, texcrd, temp.x;\n");
3942     shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex);
3943
3944     /* Put the value into the other chroma */
3945     shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n");
3946     shader_addline(buffer, "MAD chroma.y, luminance.%c, temp.x, chroma.y;\n", chroma);
3947     shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n");
3948     shader_addline(buffer, "MAD chroma.x, luminance.%c, temp.x, chroma.x;\n", chroma);
3949
3950     /* TODO: If filtering is enabled, sample a 2nd pair of pixels left or right of
3951      * the current one and lerp the two U and V values
3952      */
3953
3954     /* This gives the correctly filtered luminance value */
3955     shader_addline(buffer, "TEX luminance, fragment.texcoord[0], texture[0], %s;\n", tex);
3956
3957     return TRUE;
3958 }
3959
3960 static BOOL gen_yv12_read(SHADER_BUFFER *buffer, GLenum textype, char *luminance)
3961 {
3962     const char *tex;
3963
3964     switch(textype) {
3965         case GL_TEXTURE_2D:             tex = "2D";     break;
3966         case GL_TEXTURE_RECTANGLE_ARB:  tex = "RECT";   break;
3967         default:
3968             FIXME("Implement yv12 correction for non-2d, non-rect textures\n");
3969             return FALSE;
3970     }
3971
3972     /* YV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2)
3973      * V and a (W/2)x(H/2) U plane, each with 8 bit per pixel. So the effective
3974      * bitdepth is 12 bits per pixel. Since the U and V planes have only half the
3975      * pitch of the luminance plane, the packing into the gl texture is a bit
3976      * unfortunate. If the whole texture is interpreted as luminance data it looks
3977      * approximately like this:
3978      *
3979      *        +----------------------------------+----
3980      *        |                                  |
3981      *        |                                  |
3982      *        |                                  |
3983      *        |                                  |
3984      *        |                                  |   2
3985      *        |            LUMINANCE             |   -
3986      *        |                                  |   3
3987      *        |                                  |
3988      *        |                                  |
3989      *        |                                  |
3990      *        |                                  |
3991      *        +----------------+-----------------+----
3992      *        |                |                 |
3993      *        |  U even rows   |  U odd rows     |
3994      *        |                |                 |   1
3995      *        +----------------+------------------   -
3996      *        |                |                 |   3
3997      *        |  V even rows   |  V odd rows     |
3998      *        |                |                 |
3999      *        +----------------+-----------------+----
4000      *        |                |                 |
4001      *        |     0.5        |       0.5       |
4002      *
4003      * So it appears as if there are 4 chroma images, but in fact the odd rows
4004      * in the chroma images are in the same row as the even ones. So its is
4005      * kinda tricky to read
4006      *
4007      * When reading from rectangle textures, keep in mind that the input y coordinates
4008      * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height
4009      */
4010     shader_addline(buffer, "PARAM yv12_coef = {%f, %f, %f, %f};\n",
4011                    2.0 / 3.0, 1.0 / 6.0, (2.0 / 3.0) + (1.0 / 6.0), 1.0 / 3.0);
4012
4013     shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
4014     /* the chroma planes have only half the width */
4015     shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n");
4016
4017     /* The first value is between 2/3 and 5/6th of the texture's height, so scale+bias
4018      * the coordinate. Also read the right side of the image when reading odd lines
4019      *
4020      * Don't forget to clamp the y values in into the range, otherwise we'll get filtering
4021      * bleeding
4022      */
4023     if(textype == GL_TEXTURE_2D) {
4024
4025         shader_addline(buffer, "RCP chroma.w, size.y;\n");
4026
4027         shader_addline(buffer, "MUL texcrd2.y, texcrd.y, size.y;\n");
4028
4029         shader_addline(buffer, "FLR texcrd2.y, texcrd2.y;\n");
4030         shader_addline(buffer, "MAD texcrd.y, texcrd.y, yv12_coef.y, yv12_coef.x;\n");
4031
4032         /* Read odd lines from the right side(add size * 0.5 to the x coordinate */
4033         shader_addline(buffer, "ADD texcrd2.x, texcrd2.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */
4034         shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n");
4035         shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n");
4036         shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n");
4037
4038         /* clamp, keep the half pixel origin in mind */
4039         shader_addline(buffer, "MAD temp.y, coef.y, chroma.w, yv12_coef.x;\n");
4040         shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
4041         shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.z;\n");
4042         shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
4043     } else {
4044         /* Read from [size - size+size/4] */
4045         shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n");
4046         shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.w, size.y;\n");
4047
4048         /* Read odd lines from the right side(add size * 0.5 to the x coordinate */
4049         shader_addline(buffer, "ADD texcrd2.x, texcrd.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */
4050         shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n");
4051         shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n");
4052         shader_addline(buffer, "MUL texcrd2.x, texcrd2.x, size.x;\n");
4053         shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n");
4054
4055         /* Make sure to read exactly from the pixel center */
4056         shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n");
4057         shader_addline(buffer, "ADD texcrd.y, texcrd.y, coef.y;\n");
4058
4059         /* Clamp */
4060         shader_addline(buffer, "MAD temp.y, size.y, coef.w, size.y;\n");
4061         shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n");
4062         shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
4063         shader_addline(buffer, "ADD temp.y, size.y, -coef.y;\n");
4064         shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
4065     }
4066     /* Read the texture, put the result into the output register */
4067     shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
4068     shader_addline(buffer, "MOV chroma.x, temp.w;\n");
4069
4070     /* The other chroma value is 1/6th of the texture lower, from 5/6th to 6/6th
4071      * No need to clamp because we're just reusing the already clamped value from above
4072      */
4073     if(textype == GL_TEXTURE_2D) {
4074         shader_addline(buffer, "ADD texcrd.y, texcrd.y, yv12_coef.y;\n");
4075     } else {
4076         shader_addline(buffer, "MAD texcrd.y, size.y, coef.w, texcrd.y;\n");
4077     }
4078     shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
4079     shader_addline(buffer, "MOV chroma.y, temp.w;\n");
4080
4081     /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate.
4082      * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance
4083      * values due to filtering
4084      */
4085     shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
4086     if(textype == GL_TEXTURE_2D) {
4087         /* Multiply the y coordinate by 2/3 and clamp it */
4088         shader_addline(buffer, "MUL texcrd.y, texcrd.y, yv12_coef.x;\n");
4089         shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.x;\n");
4090         shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
4091         shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
4092     } else {
4093         /* Reading from texture_rectangles is pretty straightforward, just use the unmodified
4094          * texture coordinate. It is still a good idea to clamp it though, since the opengl texture
4095          * is bigger
4096          */
4097         shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n");
4098         shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n");
4099         shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
4100     }
4101     *luminance = 'a';
4102
4103     return TRUE;
4104 }
4105
4106 static GLuint gen_yuv_shader(IWineD3DDeviceImpl *device, enum yuv_fixup yuv_fixup, GLenum textype)
4107 {
4108     GLenum shader;
4109     SHADER_BUFFER buffer;
4110     char luminance_component;
4111     struct arbfp_blit_priv *priv = device->blit_priv;
4112
4113     /* Shader header */
4114     shader_buffer_init(&buffer);
4115
4116     ENTER_GL();
4117     GL_EXTCALL(glGenProgramsARB(1, &shader));
4118     checkGLcall("GL_EXTCALL(glGenProgramsARB(1, &shader))");
4119     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
4120     checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");
4121     LEAVE_GL();
4122     if(!shader) {
4123         shader_buffer_free(&buffer);
4124         return 0;
4125     }
4126
4127     /* The YUY2 and UYVY formats contain two pixels packed into a 32 bit macropixel,
4128      * giving effectively 16 bit per pixel. The color consists of a luminance(Y) and
4129      * two chroma(U and V) values. Each macropixel has two luminance values, one for
4130      * each single pixel it contains, and one U and one V value shared between both
4131      * pixels.
4132      *
4133      * The data is loaded into an A8L8 texture. With YUY2, the luminance component
4134      * contains the luminance and alpha the chroma. With UYVY it is vice versa. Thus
4135      * take the format into account when generating the read swizzles
4136      *
4137      * Reading the Y value is straightforward - just sample the texture. The hardware
4138      * takes care of filtering in the horizontal and vertical direction.
4139      *
4140      * Reading the U and V values is harder. We have to avoid filtering horizontally,
4141      * because that would mix the U and V values of one pixel or two adjacent pixels.
4142      * Thus floor the texture coordinate and add 0.5 to get an unfiltered read,
4143      * regardless of the filtering setting. Vertical filtering works automatically
4144      * though - the U and V values of two rows are mixed nicely.
4145      *
4146      * Appart of avoiding filtering issues, the code has to know which value it just
4147      * read, and where it can find the other one. To determine this, it checks if
4148      * it sampled an even or odd pixel, and shifts the 2nd read accordingly.
4149      *
4150      * Handling horizontal filtering of U and V values requires reading a 2nd pair
4151      * of pixels, extracting U and V and mixing them. This is not implemented yet.
4152      *
4153      * An alternative implementation idea is to load the texture as A8R8G8B8 texture,
4154      * with width / 2. This way one read gives all 3 values, finding U and V is easy
4155      * in an unfiltered situation. Finding the luminance on the other hand requires
4156      * finding out if it is an odd or even pixel. The real drawback of this approach
4157      * is filtering. This would have to be emulated completely in the shader, reading
4158      * up two 2 packed pixels in up to 2 rows and interpolating both horizontally and
4159      * vertically. Beyond that it would require adjustments to the texture handling
4160      * code to deal with the width scaling
4161      */
4162     shader_addline(&buffer, "!!ARBfp1.0\n");
4163     shader_addline(&buffer, "TEMP luminance;\n");
4164     shader_addline(&buffer, "TEMP temp;\n");
4165     shader_addline(&buffer, "TEMP chroma;\n");
4166     shader_addline(&buffer, "TEMP texcrd;\n");
4167     shader_addline(&buffer, "TEMP texcrd2;\n");
4168     shader_addline(&buffer, "PARAM coef = {1.0, 0.5, 2.0, 0.25};\n");
4169     shader_addline(&buffer, "PARAM yuv_coef = {1.403, 0.344, 0.714, 1.770};\n");
4170     shader_addline(&buffer, "PARAM size = program.local[0];\n");
4171
4172     switch (yuv_fixup)
4173     {
4174         case YUV_FIXUP_UYVY:
4175         case YUV_FIXUP_YUY2:
4176             if (!gen_planar_yuv_read(&buffer, yuv_fixup, textype, &luminance_component))
4177             {
4178                 shader_buffer_free(&buffer);
4179                 return 0;
4180             }
4181             break;
4182
4183         case YUV_FIXUP_YV12:
4184             if (!gen_yv12_read(&buffer, textype, &luminance_component))
4185             {
4186                 shader_buffer_free(&buffer);
4187                 return 0;
4188             }
4189             break;
4190
4191         default:
4192             FIXME("Unsupported YUV fixup %#x\n", yuv_fixup);
4193             shader_buffer_free(&buffer);
4194             return 0;
4195     }
4196
4197     /* Calculate the final result. Formula is taken from
4198      * http://www.fourcc.org/fccyvrgb.php. Note that the chroma
4199      * ranges from -0.5 to 0.5
4200      */
4201     shader_addline(&buffer, "SUB chroma.xy, chroma, coef.y;\n");
4202
4203     shader_addline(&buffer, "MAD result.color.x, chroma.x, yuv_coef.x, luminance.%c;\n", luminance_component);
4204     shader_addline(&buffer, "MAD temp.x, -chroma.y, yuv_coef.y, luminance.%c;\n", luminance_component);
4205     shader_addline(&buffer, "MAD result.color.y, -chroma.x, yuv_coef.z, temp.x;\n");
4206     shader_addline(&buffer, "MAD result.color.z, chroma.y, yuv_coef.w, luminance.%c;\n", luminance_component);
4207     shader_addline(&buffer, "END\n");
4208
4209     ENTER_GL();
4210     GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(buffer.buffer), buffer.buffer));
4211
4212     if (glGetError() == GL_INVALID_OPERATION) {
4213         GLint pos;
4214         glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
4215         FIXME("Fragment program error at position %d: %s\n", pos,
4216               debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
4217     }
4218     shader_buffer_free(&buffer);
4219     LEAVE_GL();
4220
4221     switch (yuv_fixup)
4222     {
4223         case YUV_FIXUP_YUY2:
4224             if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->yuy2_rect_shader = shader;
4225             else priv->yuy2_2d_shader = shader;
4226             break;
4227
4228         case YUV_FIXUP_UYVY:
4229             if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->uyvy_rect_shader = shader;
4230             else priv->uyvy_2d_shader = shader;
4231             break;
4232
4233         case YUV_FIXUP_YV12:
4234             if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->yv12_rect_shader = shader;
4235             else priv->yv12_2d_shader = shader;
4236             break;
4237     }
4238
4239     return shader;
4240 }
4241
4242 static HRESULT arbfp_blit_set(IWineD3DDevice *iface, const struct GlPixelFormatDesc *format_desc,
4243         GLenum textype, UINT width, UINT height)
4244 {
4245     GLenum shader;
4246     IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) iface;
4247     float size[4] = {width, height, 1, 1};
4248     struct arbfp_blit_priv *priv = device->blit_priv;
4249     enum yuv_fixup yuv_fixup;
4250
4251     if (!is_yuv_fixup(format_desc->color_fixup))
4252     {
4253         TRACE("Fixup:\n");
4254         dump_color_fixup_desc(format_desc->color_fixup);
4255         /* Don't bother setting up a shader for unconverted formats */
4256         ENTER_GL();
4257         glEnable(textype);
4258         checkGLcall("glEnable(textype)");
4259         LEAVE_GL();
4260         return WINED3D_OK;
4261     }
4262
4263     yuv_fixup = get_yuv_fixup(format_desc->color_fixup);
4264
4265     switch(yuv_fixup)
4266     {
4267         case YUV_FIXUP_YUY2:
4268             shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->yuy2_rect_shader : priv->yuy2_2d_shader;
4269             break;
4270
4271         case YUV_FIXUP_UYVY:
4272             shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->uyvy_rect_shader : priv->uyvy_2d_shader;
4273             break;
4274
4275         case YUV_FIXUP_YV12:
4276             shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->yv12_rect_shader : priv->yv12_2d_shader;
4277             break;
4278
4279         default:
4280             FIXME("Unsupported YUV fixup %#x, not setting a shader\n", yuv_fixup);
4281             ENTER_GL();
4282             glEnable(textype);
4283             checkGLcall("glEnable(textype)");
4284             LEAVE_GL();
4285             return E_NOTIMPL;
4286     }
4287
4288     if (!shader) shader = gen_yuv_shader(device, yuv_fixup, textype);
4289
4290     ENTER_GL();
4291     glEnable(GL_FRAGMENT_PROGRAM_ARB);
4292     checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)");
4293     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
4294     checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");
4295     GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0, size));
4296     checkGLcall("glProgramLocalParameter4fvARB");
4297     LEAVE_GL();
4298
4299     return WINED3D_OK;
4300 }
4301
4302 static void arbfp_blit_unset(IWineD3DDevice *iface) {
4303     IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) iface;
4304
4305     ENTER_GL();
4306     glDisable(GL_FRAGMENT_PROGRAM_ARB);
4307     checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
4308     glDisable(GL_TEXTURE_2D);
4309     checkGLcall("glDisable(GL_TEXTURE_2D)");
4310     if(GL_SUPPORT(ARB_TEXTURE_CUBE_MAP)) {
4311         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
4312         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
4313     }
4314     if(GL_SUPPORT(ARB_TEXTURE_RECTANGLE)) {
4315         glDisable(GL_TEXTURE_RECTANGLE_ARB);
4316         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
4317     }
4318     LEAVE_GL();
4319 }
4320
4321 static BOOL arbfp_blit_color_fixup_supported(struct color_fixup_desc fixup)
4322 {
4323     enum yuv_fixup yuv_fixup;
4324
4325     if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
4326     {
4327         TRACE("Checking support for fixup:\n");
4328         dump_color_fixup_desc(fixup);
4329     }
4330
4331     if (is_identity_fixup(fixup))
4332     {
4333         TRACE("[OK]\n");
4334         return TRUE;
4335     }
4336
4337     /* We only support YUV conversions. */
4338     if (!is_yuv_fixup(fixup))
4339     {
4340         TRACE("[FAILED]\n");
4341         return FALSE;
4342     }
4343
4344     yuv_fixup = get_yuv_fixup(fixup);
4345     switch(yuv_fixup)
4346     {
4347         case YUV_FIXUP_YUY2:
4348         case YUV_FIXUP_UYVY:
4349         case YUV_FIXUP_YV12:
4350             TRACE("[OK]\n");
4351             return TRUE;
4352
4353         default:
4354             FIXME("Unsupported YUV fixup %#x\n", yuv_fixup);
4355             TRACE("[FAILED]\n");
4356             return FALSE;
4357     }
4358 }
4359
4360 const struct blit_shader arbfp_blit = {
4361     arbfp_blit_alloc,
4362     arbfp_blit_free,
4363     arbfp_blit_set,
4364     arbfp_blit_unset,
4365     arbfp_blit_color_fixup_supported,
4366 };
4367
4368 #undef GLINFO_LOCATION