quartz: Fix discontinuities in wave parser.
[wine] / dlls / wined3d / ati_fragment_shader.c
1 /*
2  * Fixed function pipeline replacement using GL_ATI_fragment_shader
3  *
4  * Copyright 2008 Stefan Dösinger(for CodeWeavers)
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  */
20
21 #include "config.h"
22
23 #include <math.h>
24 #include <stdio.h>
25
26 #include "wined3d_private.h"
27
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
29
30 /* Some private defines, Constant associations, etc
31  * Env bump matrix and per stage constant should be independent,
32  * a stage that bumpmaps can't read the per state constant
33  */
34 #define ATI_FFP_CONST_BUMPMAT(i) (GL_CON_0_ATI + i)
35 #define ATI_FFP_CONST_CONSTANT0 GL_CON_0_ATI
36 #define ATI_FFP_CONST_CONSTANT1 GL_CON_1_ATI
37 #define ATI_FFP_CONST_CONSTANT2 GL_CON_2_ATI
38 #define ATI_FFP_CONST_CONSTANT3 GL_CON_3_ATI
39 #define ATI_FFP_CONST_CONSTANT4 GL_CON_4_ATI
40 #define ATI_FFP_CONST_CONSTANT5 GL_CON_5_ATI
41 #define ATI_FFP_CONST_TFACTOR   GL_CON_6_ATI
42
43 /* GL_ATI_fragment_shader specific fixed function pipeline description. "Inherits" from the common one */
44 struct atifs_ffp_desc
45 {
46     struct ffp_desc parent;
47     GLuint shader;
48 };
49
50 struct atifs_private_data
51 {
52     struct shader_arb_priv parent;
53     struct list fragment_shaders; /* A linked list to track fragment pipeline replacement shaders */
54
55 };
56
57 static const char *debug_dstmod(GLuint mod) {
58     switch(mod) {
59         case GL_NONE:               return "GL_NONE";
60         case GL_2X_BIT_ATI:         return "GL_2X_BIT_ATI";
61         case GL_4X_BIT_ATI:         return "GL_4X_BIT_ATI";
62         case GL_8X_BIT_ATI:         return "GL_8X_BIT_ATI";
63         case GL_HALF_BIT_ATI:       return "GL_HALF_BIT_ATI";
64         case GL_QUARTER_BIT_ATI:    return "GL_QUARTER_BIT_ATI";
65         case GL_EIGHTH_BIT_ATI:     return "GL_EIGHTH_BIT_ATI";
66         case GL_SATURATE_BIT_ATI:   return "GL_SATURATE_BIT_ATI";
67         default:                    return "Unexpected modifier\n";
68     }
69 }
70
71 static const char *debug_argmod(GLuint mod) {
72     switch(mod) {
73         case GL_NONE:
74             return "GL_NONE";
75
76         case GL_2X_BIT_ATI:
77             return "GL_2X_BIT_ATI";
78         case GL_COMP_BIT_ATI:
79             return "GL_COMP_BIT_ATI";
80         case GL_NEGATE_BIT_ATI:
81             return "GL_NEGATE_BIT_ATI";
82         case GL_BIAS_BIT_ATI:
83             return "GL_BIAS_BIT_ATI";
84
85         case GL_2X_BIT_ATI | GL_COMP_BIT_ATI:
86             return "GL_2X_BIT_ATI | GL_COMP_BIT_ATI";
87         case GL_2X_BIT_ATI | GL_NEGATE_BIT_ATI:
88             return "GL_2X_BIT_ATI | GL_NEGATE_BIT_ATI";
89         case GL_2X_BIT_ATI | GL_BIAS_BIT_ATI:
90             return "GL_2X_BIT_ATI | GL_BIAS_BIT_ATI";
91         case GL_COMP_BIT_ATI | GL_NEGATE_BIT_ATI:
92             return "GL_COMP_BIT_ATI | GL_NEGATE_BIT_ATI";
93         case GL_COMP_BIT_ATI | GL_BIAS_BIT_ATI:
94             return "GL_COMP_BIT_ATI | GL_BIAS_BIT_ATI";
95         case GL_NEGATE_BIT_ATI | GL_BIAS_BIT_ATI:
96             return "GL_NEGATE_BIT_ATI | GL_BIAS_BIT_ATI";
97
98         case GL_COMP_BIT_ATI | GL_NEGATE_BIT_ATI | GL_BIAS_BIT_ATI:
99             return "GL_COMP_BIT_ATI | GL_NEGATE_BIT_ATI | GL_BIAS_BIT_ATI";
100         case GL_2X_BIT_ATI | GL_NEGATE_BIT_ATI | GL_BIAS_BIT_ATI:
101             return "GL_2X_BIT_ATI | GL_NEGATE_BIT_ATI | GL_BIAS_BIT_ATI";
102         case GL_2X_BIT_ATI | GL_COMP_BIT_ATI | GL_BIAS_BIT_ATI:
103             return "GL_2X_BIT_ATI | GL_COMP_BIT_ATI | GL_BIAS_BIT_ATI";
104         case GL_2X_BIT_ATI | GL_COMP_BIT_ATI | GL_NEGATE_BIT_ATI:
105             return "GL_2X_BIT_ATI | GL_COMP_BIT_ATI | GL_NEGATE_BIT_ATI";
106
107         case GL_2X_BIT_ATI | GL_COMP_BIT_ATI | GL_NEGATE_BIT_ATI | GL_BIAS_BIT_ATI:
108             return "GL_2X_BIT_ATI | GL_COMP_BIT_ATI | GL_NEGATE_BIT_ATI | GL_BIAS_BIT_ATI";
109
110         default:
111             return "Unexpected argmod combination\n";
112     }
113 }
114 static const char *debug_register(GLuint reg) {
115     switch(reg) {
116         case GL_REG_0_ATI:                  return "GL_REG_0_ATI";
117         case GL_REG_1_ATI:                  return "GL_REG_1_ATI";
118         case GL_REG_2_ATI:                  return "GL_REG_2_ATI";
119         case GL_REG_3_ATI:                  return "GL_REG_3_ATI";
120         case GL_REG_4_ATI:                  return "GL_REG_4_ATI";
121         case GL_REG_5_ATI:                  return "GL_REG_5_ATI";
122
123         case GL_CON_0_ATI:                  return "GL_CON_0_ATI";
124         case GL_CON_1_ATI:                  return "GL_CON_1_ATI";
125         case GL_CON_2_ATI:                  return "GL_CON_2_ATI";
126         case GL_CON_3_ATI:                  return "GL_CON_3_ATI";
127         case GL_CON_4_ATI:                  return "GL_CON_4_ATI";
128         case GL_CON_5_ATI:                  return "GL_CON_5_ATI";
129         case GL_CON_6_ATI:                  return "GL_CON_6_ATI";
130         case GL_CON_7_ATI:                  return "GL_CON_7_ATI";
131
132         case GL_ZERO:                       return "GL_ZERO";
133         case GL_ONE:                        return "GL_ONE";
134         case GL_PRIMARY_COLOR:              return "GL_PRIMARY_COLOR";
135         case GL_SECONDARY_INTERPOLATOR_ATI: return "GL_SECONDARY_INTERPOLATOR_ATI";
136
137         default:                            return "Unknown register\n";
138     }
139 }
140
141 static const char *debug_swizzle(GLuint swizzle) {
142     switch(swizzle) {
143         case GL_SWIZZLE_STR_ATI:        return "GL_SWIZZLE_STR_ATI";
144         case GL_SWIZZLE_STQ_ATI:        return "GL_SWIZZLE_STQ_ATI";
145         case GL_SWIZZLE_STR_DR_ATI:     return "GL_SWIZZLE_STR_DR_ATI";
146         case GL_SWIZZLE_STQ_DQ_ATI:     return "GL_SWIZZLE_STQ_DQ_ATI";
147         default:                        return "unknown swizzle";
148     }
149 }
150
151 #define GLINFO_LOCATION (*gl_info)
152 static GLuint register_for_arg(DWORD arg, WineD3D_GL_Info *gl_info, unsigned int stage, GLuint *mod, GLuint tmparg) {
153     GLenum ret;
154
155     if(mod) *mod = GL_NONE;
156     if(arg == 0xFFFFFFFF) return -1; /* This is the marker for unused registers */
157
158     switch(arg & WINED3DTA_SELECTMASK) {
159         case WINED3DTA_DIFFUSE:
160             ret = GL_PRIMARY_COLOR;
161             break;
162
163         case WINED3DTA_CURRENT:
164             /* Note that using GL_REG_0_ATI for the passed on register is safe because
165              * texture0 is read at stage0, so in the worst case it is read in the
166              * instruction writing to reg0. Afterwards texture0 is not used any longer.
167              * If we're reading from current
168              */
169             if(stage == 0) {
170                 ret = GL_PRIMARY_COLOR;
171             } else {
172                 ret = GL_REG_0_ATI;
173             }
174             break;
175
176         case WINED3DTA_TEXTURE:
177             ret = GL_REG_0_ATI + stage;
178             break;
179
180         case WINED3DTA_TFACTOR:
181             ret = ATI_FFP_CONST_TFACTOR;
182             break;
183
184         case WINED3DTA_SPECULAR:
185             ret = GL_SECONDARY_INTERPOLATOR_ATI;
186             break;
187
188         case WINED3DTA_TEMP:
189             ret = tmparg;
190             break;
191
192         case WINED3DTA_CONSTANT:
193             FIXME("Unhandled source argument WINED3DTA_TEMP\n");
194             ret = GL_CON_0_ATI;
195             break;
196
197         default:
198             FIXME("Unknown source argument %d\n", arg);
199             ret = GL_ZERO;
200     }
201
202     if(arg & WINED3DTA_COMPLEMENT) {
203         if(mod) *mod |= GL_COMP_BIT_ATI;
204     }
205     if(arg & WINED3DTA_ALPHAREPLICATE) {
206         FIXME("Unhandled read modifier WINED3DTA_ALPHAREPLICATE\n");
207     }
208     return ret;
209 }
210
211 static GLuint find_tmpreg(struct texture_stage_op op[MAX_TEXTURES]) {
212     int lowest_read = -1;
213     int lowest_write = -1;
214     int i;
215     BOOL tex_used[MAX_TEXTURES];
216
217     memset(tex_used, 0, sizeof(tex_used));
218     for(i = 0; i < MAX_TEXTURES; i++) {
219         if(op[i].cop == WINED3DTOP_DISABLE) {
220             break;
221         }
222
223         if(lowest_read == -1 &&
224           (op[i].carg1 == WINED3DTA_TEMP || op[i].carg2 == WINED3DTA_TEMP || op[i].carg0 == WINED3DTA_TEMP ||
225            op[i].aarg1 == WINED3DTA_TEMP || op[i].aarg2 == WINED3DTA_TEMP || op[i].aarg0 == WINED3DTA_TEMP)) {
226             lowest_read = i;
227         }
228
229         if(lowest_write == -1 && op[i].dst == WINED3DTA_TEMP) {
230             lowest_write = i;
231         }
232
233         if(op[i].carg1 == WINED3DTA_TEXTURE || op[i].carg2 == WINED3DTA_TEXTURE || op[i].carg0 == WINED3DTA_TEXTURE ||
234            op[i].aarg1 == WINED3DTA_TEXTURE || op[i].aarg2 == WINED3DTA_TEXTURE || op[i].aarg0 == WINED3DTA_TEXTURE) {
235             tex_used[i] = TRUE;
236         }
237     }
238
239     /* Temp reg not read? We don't need it, return GL_NONE */
240     if(lowest_read == -1) return GL_NONE;
241
242     if(lowest_write >= lowest_read) {
243         FIXME("Temp register read before being written\n");
244     }
245
246     if(lowest_write == -1) {
247         /* This needs a test. Maybe we are supposed to return 0.0/0.0/0.0/0.0, or fail drawprim, or whatever */
248         FIXME("Temp register read without being written\n");
249         return GL_REG_1_ATI;
250     } else if(lowest_write >= 1) {
251         /* If we're writing to the temp reg at earliest in stage 1, we can use register 1 for the temp result.
252          * there may be texture data stored in reg 1, but we do not need it any longer since stage 1 already
253          * read it
254          */
255         return GL_REG_1_ATI;
256     } else {
257         /* Search for a free texture register. We have 6 registers available. GL_REG_0_ATI is already used
258          * for the regular result
259          */
260         for(i = 1; i < 6; i++) {
261             if(!tex_used[i]) {
262                 return GL_REG_0_ATI + i;
263             }
264         }
265         /* What to do here? Report it in ValidateDevice? */
266         FIXME("Could not find a register for the temporary register\n");
267         return 0;
268     }
269 }
270
271 static GLuint gen_ati_shader(struct texture_stage_op op[MAX_TEXTURES], WineD3D_GL_Info *gl_info) {
272     GLuint ret = GL_EXTCALL(glGenFragmentShadersATI(1));
273     unsigned int stage;
274     GLuint arg0, arg1, arg2, extrarg;
275     GLuint dstmod, argmod0, argmod1, argmod2, argmodextra;
276     GLuint swizzle;
277     GLuint tmparg = find_tmpreg(op);
278     GLuint dstreg;
279
280     if(!ret) {
281         ERR("Failed to generate a GL_ATI_fragment_shader shader id\n");
282         return 0;
283     }
284     GL_EXTCALL(glBindFragmentShaderATI(ret));
285     checkGLcall("GL_EXTCALL(glBindFragmentShaderATI(ret))");
286
287     TRACE("glBeginFragmentShaderATI()\n");
288     GL_EXTCALL(glBeginFragmentShaderATI());
289     checkGLcall("GL_EXTCALL(glBeginFragmentShaderATI())");
290
291     /* Pass 1: Generate sampling instructions for perturbation maps */
292       for(stage = 0; stage < GL_LIMITS(textures); stage++) {
293         if(op[stage].cop == WINED3DTOP_DISABLE) break;
294         if(op[stage].cop != WINED3DTOP_BUMPENVMAP &&
295            op[stage].cop != WINED3DTOP_BUMPENVMAPLUMINANCE) continue;
296
297         TRACE("glSampleMapATI(GL_REG_%d_ATI, GL_TEXTURE_%d_ARB, GL_SWIZZLE_STR_ATI)\n",
298               stage, stage);
299         GL_EXTCALL(glSampleMapATI(GL_REG_0_ATI + stage,
300                    GL_TEXTURE0_ARB + stage,
301                    GL_SWIZZLE_STR_ATI));
302         TRACE("glPassTexCoordATI(GL_REG_%d_ATI, GL_TEXTURE_%d_ARB, GL_SWIZZLE_STR_ATI)\n",
303               stage + 1, stage + 1);
304         GL_EXTCALL(glPassTexCoordATI(GL_REG_0_ATI + stage + 1,
305                    GL_TEXTURE0_ARB + stage + 1,
306                    GL_SWIZZLE_STR_ATI));
307
308         /* We need GL_REG_5_ATI as a temporary register to swizzle the bump matrix. So we run into
309          * issues if we're bump mapping on stage 4 or 5
310          */
311         if(stage >= 4) {
312             FIXME("Bump mapping in stage %d\n", stage);
313         }
314     }
315
316     /* Pass 2: Generate perturbation calculations */
317     for(stage = 0; stage < GL_LIMITS(textures); stage++) {
318         if(op[stage].cop == WINED3DTOP_DISABLE) break;
319         if(op[stage].cop != WINED3DTOP_BUMPENVMAP &&
320            op[stage].cop != WINED3DTOP_BUMPENVMAPLUMINANCE) continue;
321
322         /* Nice thing, we get the color correction for free :-) */
323         if(op[stage].color_correction == WINED3DFMT_V8U8) {
324             argmodextra = GL_2X_BIT_ATI | GL_BIAS_BIT_ATI;
325         } else {
326             argmodextra = 0;
327         }
328
329         TRACE("glColorFragmentOp3ATI(GL_DOT2_ADD_ATI, GL_REG_%d_ATI, GL_RED_BIT_ATI, GL_NONE, GL_REG_%d_ATI, GL_NONE, %s, ATI_FFP_CONST_BUMPMAT(%d), GL_NONE, GL_NONE, GL_REG_%d_ATI, GL_RED, GL_NONE)\n",
330               stage + 1, stage, debug_argmod(argmodextra), stage, stage + 1);
331         GL_EXTCALL(glColorFragmentOp3ATI(GL_DOT2_ADD_ATI, GL_REG_0_ATI + stage + 1, GL_RED_BIT_ATI, GL_NONE,
332                                          GL_REG_0_ATI + stage, GL_NONE, argmodextra,
333                                          ATI_FFP_CONST_BUMPMAT(stage), GL_NONE, GL_2X_BIT_ATI | GL_BIAS_BIT_ATI,
334                                          GL_REG_0_ATI + stage + 1, GL_RED, GL_NONE));
335
336         /* FIXME: How can I make GL_DOT2_ADD_ATI read the factors from blue and alpha? It defaults to red and green,
337          * and it is fairly easy to make it read GL_BLUE or BL_ALPHA, but I can't get an R * B + G * A. So we're wasting
338          * one register and two instructions in this pass for a simple swizzling operation.
339          * For starters it might be good enough to merge the two movs into one, but even that isn't possible :-(
340          *
341          * NOTE: GL_BLUE | GL_ALPHA is not possible. It doesn't throw a compilation error, but an OR operation on the
342          * constants doesn't make sense, considering their values.
343          */
344         TRACE("glColorFragmentOp1ATI(GL_MOV_ATI, GL_REG_5_ATI, GL_RED_BIT_ATI, GL_NONE, ATI_FFP_CONST_BUMPMAT(%d), GL_BLUE, GL_NONE)\n", stage);
345         GL_EXTCALL(glColorFragmentOp1ATI(GL_MOV_ATI, GL_REG_5_ATI, GL_RED_BIT_ATI, GL_NONE,
346                                          ATI_FFP_CONST_BUMPMAT(stage), GL_BLUE, GL_NONE));
347         TRACE("glColorFragmentOp1ATI(GL_MOV_ATI, GL_REG_5_ATI, GL_GREEN_BIT_ATI, GL_NONE, ATI_FFP_CONST_BUMPMAT(%d), GL_ALPHA, GL_NONE)\n", stage);
348         GL_EXTCALL(glColorFragmentOp1ATI(GL_MOV_ATI, GL_REG_5_ATI, GL_GREEN_BIT_ATI, GL_NONE,
349                                         ATI_FFP_CONST_BUMPMAT(stage), GL_ALPHA, GL_NONE));
350         TRACE("glColorFragmentOp3ATI(GL_DOT2_ADD_ATI, GL_REG_%d_ATI, GL_GREEN_BIT_ATI, GL_NONE, GL_REG_%d_ATI, GL_NONE, %s, GL_REG_5_ATI, GL_NONE, GL_NONE, GL_REG_%d_ATI, GL_GREEN, GL_NONE)\n",
351               stage + 1, stage, debug_argmod(argmodextra), stage + 1);
352         GL_EXTCALL(glColorFragmentOp3ATI(GL_DOT2_ADD_ATI, GL_REG_0_ATI + stage + 1, GL_GREEN_BIT_ATI, GL_NONE,
353                                          GL_REG_0_ATI + stage, GL_NONE, argmodextra,
354                                          GL_REG_5_ATI, GL_NONE, GL_2X_BIT_ATI | GL_BIAS_BIT_ATI,
355                                          GL_REG_0_ATI + stage + 1, GL_GREEN, GL_NONE));
356     }
357
358     /* Pass 3: Generate sampling instructions for regular textures */
359     for(stage = 0; stage < GL_LIMITS(textures); stage++) {
360         if(op[stage].cop == WINED3DTOP_DISABLE) {
361             break;
362         }
363
364         if(op[stage].projected == proj_none) {
365             swizzle = GL_SWIZZLE_STR_ATI;
366         } else if(op[stage].projected == proj_count3) {
367             /* TODO: D3DTTFF_COUNT3 | D3DTTFF_PROJECTED would be GL_SWIZZLE_STR_DR_ATI.
368              * However, the FFP vertex processing texture transform matrix handler does
369              * some transformations in the texture matrix which makes the 3rd coordinate
370              * arrive in Q, not R in that case. This is needed for opengl fixed function
371              * fragment processing which always divides by Q. In this backend we can
372              * handle that properly and be compatible with vertex shader output and avoid
373              * side effects of the texture matrix games
374              */
375             swizzle = GL_SWIZZLE_STQ_DQ_ATI;
376         } else {
377             swizzle = GL_SWIZZLE_STQ_DQ_ATI;
378         }
379
380         if((op[stage].carg0 & WINED3DTA_SELECTMASK) == WINED3DTA_TEXTURE ||
381            (op[stage].carg1 & WINED3DTA_SELECTMASK) == WINED3DTA_TEXTURE ||
382            (op[stage].carg2 & WINED3DTA_SELECTMASK) == WINED3DTA_TEXTURE ||
383            (op[stage].aarg0 & WINED3DTA_SELECTMASK) == WINED3DTA_TEXTURE ||
384            (op[stage].aarg1 & WINED3DTA_SELECTMASK) == WINED3DTA_TEXTURE ||
385            (op[stage].aarg2 & WINED3DTA_SELECTMASK) == WINED3DTA_TEXTURE ||
386             op[stage].cop == WINED3DTOP_BLENDTEXTUREALPHA) {
387
388             if(stage > 0 &&
389                (op[stage - 1].cop == WINED3DTOP_BUMPENVMAP ||
390                 op[stage - 1].cop == WINED3DTOP_BUMPENVMAPLUMINANCE)) {
391                 TRACE("glSampleMapATI(GL_REG_%d_ATI, GL_REG_%d_ATI, GL_SWIZZLE_STR_ATI)\n",
392                       stage, stage);
393                 GL_EXTCALL(glSampleMapATI(GL_REG_0_ATI + stage,
394                            GL_REG_0_ATI + stage,
395                            GL_SWIZZLE_STR_ATI));
396             } else {
397                 TRACE("glSampleMapATI(GL_REG_%d_ATI, GL_TEXTURE_%d_ARB, %s)\n",
398                     stage, stage, debug_swizzle(swizzle));
399                 GL_EXTCALL(glSampleMapATI(GL_REG_0_ATI + stage,
400                                         GL_TEXTURE0_ARB + stage,
401                                         swizzle));
402             }
403         }
404     }
405
406     /* Pass 4: Generate the arithmetic instructions */
407     for(stage = 0; stage < MAX_TEXTURES; stage++) {
408         if(op[stage].cop == WINED3DTOP_DISABLE) {
409             if(stage == 0) {
410                 /* Handle complete texture disabling gracefully */
411                 TRACE("glColorFragmentOp1ATI(GL_MOV_ATI, GL_REG_0_ATI, GL_NONE, GL_NONE, GL_PRIMARY_COLOR, GL_NONE, GL_NONE)\n");
412                 GL_EXTCALL(glColorFragmentOp1ATI(GL_MOV_ATI, GL_REG_0_ATI, GL_NONE, GL_NONE,
413                                                  GL_PRIMARY_COLOR, GL_NONE, GL_NONE));
414                 TRACE("glAlphaFragmentOp1ATI(GL_MOV_ATI, GL_REG_0_ATI, GL_NONE, GL_PRIMARY_COLOR, GL_NONE, GL_NONE)\n");
415                 GL_EXTCALL(glAlphaFragmentOp1ATI(GL_MOV_ATI, GL_REG_0_ATI, GL_NONE,
416                                                  GL_PRIMARY_COLOR, GL_NONE, GL_NONE));
417             }
418             break;
419         }
420
421         if(op[stage].dst == WINED3DTA_TEMP) {
422             /* If we're writing to D3DTA_TEMP, but never reading from it we don't have to write there in the first place.
423              * skip the entire stage, this saves some GPU time
424              */
425             if(tmparg == GL_NONE) continue;
426
427             dstreg = tmparg;
428         } else {
429             dstreg = GL_REG_0_ATI;
430         }
431
432         arg0 = register_for_arg(op[stage].carg0, gl_info, stage, &argmod0, tmparg);
433         arg1 = register_for_arg(op[stage].carg1, gl_info, stage, &argmod1, tmparg);
434         arg2 = register_for_arg(op[stage].carg2, gl_info, stage, &argmod2, tmparg);
435         dstmod = GL_NONE;
436         argmodextra = GL_NONE;
437         extrarg = GL_NONE;
438
439         switch(op[stage].cop) {
440             case WINED3DTOP_SELECTARG2:
441                 arg1 = arg2;
442                 argmod1 = argmod2;
443             case WINED3DTOP_SELECTARG1:
444                 TRACE("glColorFragmentOp1ATI(GL_MOV_ATI, %s, GL_NONE, GL_NONE, %s, GL_NONE, %s)\n",
445                       debug_register(dstreg), debug_register(arg1), debug_argmod(argmod1));
446                 GL_EXTCALL(glColorFragmentOp1ATI(GL_MOV_ATI, dstreg, GL_NONE, GL_NONE,
447                                                  arg1, GL_NONE, argmod1));
448                 break;
449
450             case WINED3DTOP_MODULATE4X:
451                 if(dstmod == GL_NONE) dstmod = GL_4X_BIT_ATI;
452             case WINED3DTOP_MODULATE2X:
453                 if(dstmod == GL_NONE) dstmod = GL_2X_BIT_ATI;
454             case WINED3DTOP_MODULATE:
455                 TRACE("glColorFragmentOp2ATI(GL_MUL_ATI, %s, GL_NONE, %s, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
456                       debug_register(dstreg), debug_dstmod(dstmod),
457                       debug_register(arg1), debug_argmod(argmod1),
458                       debug_register(arg2), debug_argmod(argmod2));
459                 GL_EXTCALL(glColorFragmentOp2ATI(GL_MUL_ATI, dstreg, GL_NONE, dstmod,
460                                                  arg1, GL_NONE, argmod1,
461                                                  arg2, GL_NONE, argmod2));
462                 break;
463
464             case WINED3DTOP_ADDSIGNED2X:
465                 dstmod = GL_2X_BIT_ATI;
466             case WINED3DTOP_ADDSIGNED:
467                 argmodextra = GL_BIAS_BIT_ATI;
468             case WINED3DTOP_ADD:
469                 TRACE("glColorFragmentOp2ATI(GL_ADD_ATI, %s, GL_NONE, %s, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
470                       debug_register(dstreg), debug_dstmod(dstmod),
471                       debug_register(arg1), debug_argmod(argmod1),
472                       debug_register(arg2), debug_argmod(argmodextra | argmod2));
473                 GL_EXTCALL(glColorFragmentOp2ATI(GL_ADD_ATI, GL_REG_0_ATI, GL_NONE, dstmod,
474                                                  arg1, GL_NONE, argmod1,
475                                                  arg2, GL_NONE, argmodextra | argmod2));
476                 break;
477
478             case WINED3DTOP_SUBTRACT:
479                 TRACE("glColorFragmentOp2ATI(GL_SUB_ATI, %s, GL_NONE, %s, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
480                       debug_register(dstreg), debug_dstmod(dstmod),
481                       debug_register(arg1), debug_argmod(argmod1),
482                       debug_register(arg2), debug_argmod(argmod2));
483                 GL_EXTCALL(glColorFragmentOp2ATI(GL_SUB_ATI, dstreg, GL_NONE, dstmod,
484                                                  arg1, GL_NONE, argmod1,
485                                                  arg2, GL_NONE, argmod2));
486                 break;
487
488             case WINED3DTOP_ADDSMOOTH:
489                 argmodextra = argmod1 & GL_COMP_BIT_ATI ? argmod1 & ~GL_COMP_BIT_ATI : argmod1 | GL_COMP_BIT_ATI;
490                 TRACE("glColorFragmentOp3ATI(GL_MAD_ATI, %s, GL_NONE, GL_NONE, %s, GL_NONE, %s, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
491                       debug_register(dstreg),
492                       debug_register(arg2), debug_argmod(argmod2),
493                       debug_register(arg1), debug_argmod(argmodextra),
494                       debug_register(arg1), debug_argmod(argmod1));
495                 /* Dst = arg1 + * arg2(1 -arg 1)
496                  *     = arg2 * (1 - arg1) + arg1
497                  */
498                 GL_EXTCALL(glColorFragmentOp3ATI(GL_MAD_ATI, dstreg, GL_NONE, GL_NONE,
499                                                  arg2, GL_NONE, argmod2,
500                                                  arg1, GL_NONE, argmodextra,
501                                                  arg1, GL_NONE, argmod1));
502                 break;
503
504             case WINED3DTOP_BLENDCURRENTALPHA:
505                 if(extrarg == GL_NONE) extrarg = register_for_arg(WINED3DTA_CURRENT, gl_info, stage, NULL, -1);
506             case WINED3DTOP_BLENDFACTORALPHA:
507                 if(extrarg == GL_NONE) extrarg = register_for_arg(WINED3DTA_TFACTOR, gl_info, stage, NULL, -1);
508             case WINED3DTOP_BLENDTEXTUREALPHA:
509                 if(extrarg == GL_NONE) extrarg = register_for_arg(WINED3DTA_TEXTURE, gl_info, stage, NULL, -1);
510             case WINED3DTOP_BLENDDIFFUSEALPHA:
511                 if(extrarg == GL_NONE) extrarg = register_for_arg(WINED3DTA_DIFFUSE, gl_info, stage, NULL, -1);
512                 TRACE("glColorFragmentOp3ATI(GL_LERP_ATI, %s, GL_NONE, GL_NONE, %s, GL_ALPHA, GL_NONE, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
513                       debug_register(dstreg),
514                       debug_register(extrarg),
515                       debug_register(arg1), debug_argmod(argmod1),
516                       debug_register(arg2), debug_argmod(argmod2));
517                 GL_EXTCALL(glColorFragmentOp3ATI(GL_LERP_ATI, dstreg, GL_NONE, GL_NONE,
518                                                  extrarg, GL_ALPHA, GL_NONE,
519                                                  arg1, GL_NONE, argmod1,
520                                                  arg2, GL_NONE, argmod2));
521                 break;
522
523             case WINED3DTOP_BLENDTEXTUREALPHAPM:
524                 arg0 = register_for_arg(WINED3DTA_TEXTURE, gl_info, stage, NULL, -1);
525                 TRACE("glColorFragmentOp3ATI(GL_MAD_ATI, %s, GL_NONE, GL_NONE, %s, GL_NONE, %s, %s, GL_ALPHA, GL_COMP_BIT_ATI, %s, GL_NONE, %s)\n",
526                       debug_register(dstreg),
527                       debug_register(arg2), debug_argmod(argmod2),
528                       debug_register(arg0),
529                       debug_register(arg1), debug_argmod(argmod1));
530                 GL_EXTCALL(glColorFragmentOp3ATI(GL_MAD_ATI, dstreg, GL_NONE, GL_NONE,
531                                                  arg2, GL_NONE,  argmod2,
532                                                  arg0, GL_ALPHA, GL_COMP_BIT_ATI,
533                                                  arg1, GL_NONE,  argmod1));
534                 break;
535
536             /* D3DTOP_PREMODULATE ???? */
537
538             case WINED3DTOP_MODULATEINVALPHA_ADDCOLOR:
539                 argmodextra = argmod1 & GL_COMP_BIT_ATI ? argmod1 & ~GL_COMP_BIT_ATI : argmod1 | GL_COMP_BIT_ATI;
540             case WINED3DTOP_MODULATEALPHA_ADDCOLOR:
541                 if(!argmodextra) argmodextra = argmod1;
542                 TRACE("glColorFragmentOp3ATI(GL_MAD_ATI, %s, GL_NONE, GL_NONE, %s, GL_NONE, %s, %s, GL_ALPHA, %s, %s, GL_NONE, %s)\n",
543                       debug_register(dstreg),
544                       debug_register(arg2), debug_argmod(argmod2),
545                       debug_register(arg1), debug_argmod(argmodextra), debug_register(arg1), debug_argmod(arg1));
546                 GL_EXTCALL(glColorFragmentOp3ATI(GL_MAD_ATI, dstreg, GL_NONE, GL_NONE,
547                                                  arg2, GL_NONE,  argmod2,
548                                                  arg1, GL_ALPHA, argmodextra,
549                                                  arg1, GL_NONE,  argmod1));
550                 break;
551
552             case WINED3DTOP_MODULATEINVCOLOR_ADDALPHA:
553                 argmodextra = argmod1 & GL_COMP_BIT_ATI ? argmod1 & ~GL_COMP_BIT_ATI : argmod1 | GL_COMP_BIT_ATI;
554             case WINED3DTOP_MODULATECOLOR_ADDALPHA:
555                 if(!argmodextra) argmodextra = argmod1;
556                 TRACE("glColorFragmentOp3ATI(GL_MAD_ATI, %s, GL_NONE, GL_NONE, %s, GL_NONE, %s, %s, GL_NONE, %s, %s, GL_ALPHA, %s)\n",
557                       debug_register(dstreg),
558                       debug_register(arg2), debug_argmod(argmod2),
559                       debug_register(arg1), debug_argmod(argmodextra),
560                       debug_register(arg1), debug_argmod(argmod1));
561                 GL_EXTCALL(glColorFragmentOp3ATI(GL_MAD_ATI, dstreg, GL_NONE, GL_NONE,
562                                                  arg2, GL_NONE,  argmod2,
563                                                  arg1, GL_NONE,  argmodextra,
564                                                  arg1, GL_ALPHA, argmod1));
565                 break;
566
567             case WINED3DTOP_DOTPRODUCT3:
568                 TRACE("glColorFragmentOp2ATI(GL_DOT3_ATI, %s, GL_NONE, GL_4X_BIT_ATI, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
569                       debug_register(dstreg),
570                       debug_register(arg1), debug_argmod(argmod1 | GL_BIAS_BIT_ATI),
571                       debug_register(arg2), debug_argmod(argmod2 | GL_BIAS_BIT_ATI));
572                 GL_EXTCALL(glColorFragmentOp2ATI(GL_DOT3_ATI, dstreg, GL_NONE, GL_4X_BIT_ATI,
573                                                  arg1, GL_NONE, argmod1 | GL_BIAS_BIT_ATI,
574                                                  arg2, GL_NONE, argmod2 | GL_BIAS_BIT_ATI));
575                 break;
576
577             case WINED3DTOP_MULTIPLYADD:
578                 TRACE("glColorFragmentOp3ATI(GL_MAD_ATI, %s, GL_NONE, GL_NONE, %s, GL_NONE, %s, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
579                       debug_register(dstreg),
580                       debug_register(arg0), debug_argmod(argmod0),
581                       debug_register(arg2), debug_argmod(argmod2),
582                       debug_register(arg1), debug_argmod(argmod1));
583                 GL_EXTCALL(glColorFragmentOp3ATI(GL_MAD_ATI, dstreg, GL_NONE, GL_NONE,
584                                                  arg0, GL_NONE, argmod0,
585                                                  arg2, GL_NONE, argmod2,
586                                                  arg1, GL_NONE, argmod1));
587                 break;
588
589             case WINED3DTOP_LERP:
590                 TRACE("glColorFragmentOp3ATI(GL_LERP_ATI, %s, GL_NONE, GL_NONE, %s, GL_NONE, %s, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
591                       debug_register(dstreg),
592                       debug_register(arg1), debug_argmod(argmod1),
593                       debug_register(arg2), debug_argmod(argmod2),
594                       debug_register(arg0), debug_argmod(argmod0));
595                 GL_EXTCALL(glColorFragmentOp3ATI(GL_LERP_ATI, dstreg, GL_NONE, GL_NONE,
596                                                  arg1, GL_NONE, argmod1,
597                                                  arg2, GL_NONE, argmod2,
598                                                  arg0, GL_NONE, argmod0));
599                 break;
600
601             case WINED3DTOP_BUMPENVMAP:
602             case WINED3DTOP_BUMPENVMAPLUMINANCE:
603                 /* Those are handled in the first pass of the shader(generation pass 1 and 2) alraedy */
604                 break;
605
606             default: FIXME("Unhandled color operation %d on stage %d\n", op[stage].cop, stage);
607         }
608
609         arg0 = register_for_arg(op[stage].aarg0, gl_info, stage, &argmod0, tmparg);
610         arg1 = register_for_arg(op[stage].aarg1, gl_info, stage, &argmod1, tmparg);
611         arg2 = register_for_arg(op[stage].aarg2, gl_info, stage, &argmod2, tmparg);
612         dstmod = GL_NONE;
613         argmodextra = GL_NONE;
614         extrarg = GL_NONE;
615
616         switch(op[stage].aop) {
617             case WINED3DTOP_DISABLE:
618                 /* Get the primary color to the output if on stage 0, otherwise leave register 0 untouched */
619                 if(stage == 0) {
620                     TRACE("glAlphaFragmentOp1ATI(GL_MOV_ATI, GL_REG_0_ATI, GL_NONE, GL_PRIMARY_COLOR, GL_NONE, GL_NONE)\n");
621                     GL_EXTCALL(glAlphaFragmentOp1ATI(GL_MOV_ATI, GL_REG_0_ATI, GL_NONE,
622                                GL_PRIMARY_COLOR, GL_NONE, GL_NONE));
623                 }
624                 break;
625
626             case WINED3DTOP_SELECTARG2:
627                 arg1 = arg2;
628                 argmod1 = argmod2;
629             case WINED3DTOP_SELECTARG1:
630                 TRACE("glAlphaFragmentOp1ATI(GL_MOV_ATI, %s,          GL_NONE, %s, GL_NONE, %s)\n",
631                       debug_register(dstreg),
632                       debug_register(arg1), debug_argmod(argmod1));
633                 GL_EXTCALL(glAlphaFragmentOp1ATI(GL_MOV_ATI, dstreg, GL_NONE,
634                                                  arg1, GL_NONE, argmod1));
635                 break;
636
637             case WINED3DTOP_MODULATE4X:
638                 if(dstmod == GL_NONE) dstmod = GL_4X_BIT_ATI;
639             case WINED3DTOP_MODULATE2X:
640                 if(dstmod == GL_NONE) dstmod = GL_2X_BIT_ATI;
641             case WINED3DTOP_MODULATE:
642                 TRACE("glAlphaFragmentOp2ATI(GL_MUL_ATI, %s,          %s, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
643                       debug_register(dstreg), debug_dstmod(dstmod),
644                       debug_register(arg1), debug_argmod(argmod1),
645                       debug_register(arg2), debug_argmod(argmod2));
646                 GL_EXTCALL(glAlphaFragmentOp2ATI(GL_MUL_ATI, dstreg, dstmod,
647                                                  arg1, GL_NONE, argmod1,
648                                                  arg2, GL_NONE, argmod2));
649                 break;
650
651             case WINED3DTOP_ADDSIGNED2X:
652                 dstmod = GL_2X_BIT_ATI;
653             case WINED3DTOP_ADDSIGNED:
654                 argmodextra = GL_BIAS_BIT_ATI;
655             case WINED3DTOP_ADD:
656                 TRACE("glAlphaFragmentOp2ATI(GL_ADD_ATI, %s,          %s, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
657                       debug_register(dstreg), debug_dstmod(dstmod),
658                       debug_register(arg1), debug_argmod(argmod1),
659                       debug_register(arg2), debug_argmod(argmodextra | argmod2));
660                 GL_EXTCALL(glAlphaFragmentOp2ATI(GL_ADD_ATI, dstreg, dstmod,
661                                                  arg1, GL_NONE, argmod1,
662                                                  arg2, GL_NONE, argmodextra | argmod2));
663                 break;
664
665             case WINED3DTOP_SUBTRACT:
666                 TRACE("glAlphaFragmentOp2ATI(GL_SUB_ATI, %s, GL_NONE, %s, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
667                       debug_register(dstreg), debug_dstmod(dstmod),
668                       debug_register(arg1), debug_argmod(argmod1),
669                       debug_register(arg2), debug_argmod(argmod2));
670                 GL_EXTCALL(glAlphaFragmentOp2ATI(GL_SUB_ATI, dstreg, dstmod,
671                                                  arg1, GL_NONE, argmod1,
672                                                  arg2, GL_NONE, argmod2));
673                 break;
674
675             case WINED3DTOP_ADDSMOOTH:
676                 argmodextra = argmod1 & GL_COMP_BIT_ATI ? argmod1 & ~GL_COMP_BIT_ATI : argmod1 | GL_COMP_BIT_ATI;
677                 TRACE("glAlphaFragmentOp3ATI(GL_MAD_ATI, %s,          GL_NONE, %s, GL_NONE, %s, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
678                       debug_register(dstreg),
679                       debug_register(arg2), debug_argmod(argmod2),
680                       debug_register(arg1), debug_argmod(argmodextra),
681                       debug_register(arg1), debug_argmod(argmod1));
682                 /* Dst = arg1 + * arg2(1 -arg 1)
683                  *     = arg2 * (1 - arg1) + arg1
684                  */
685                 GL_EXTCALL(glAlphaFragmentOp3ATI(GL_MAD_ATI, dstreg, GL_NONE,
686                                                  arg2, GL_NONE, argmod2,
687                                                  arg1, GL_NONE, argmodextra,
688                                                  arg1, GL_NONE, argmod1));
689                 break;
690
691             case WINED3DTOP_BLENDCURRENTALPHA:
692                 if(extrarg == GL_NONE) extrarg = register_for_arg(WINED3DTA_CURRENT, gl_info, stage, NULL, -1);
693             case WINED3DTOP_BLENDFACTORALPHA:
694                 if(extrarg == GL_NONE) extrarg = register_for_arg(WINED3DTA_TFACTOR, gl_info, stage, NULL, -1);
695             case WINED3DTOP_BLENDTEXTUREALPHA:
696                 if(extrarg == GL_NONE) extrarg = register_for_arg(WINED3DTA_TEXTURE, gl_info, stage, NULL, -1);
697             case WINED3DTOP_BLENDDIFFUSEALPHA:
698                 if(extrarg == GL_NONE) extrarg = register_for_arg(WINED3DTA_DIFFUSE, gl_info, stage, NULL, -1);
699                 TRACE("glAlphaFragmentOp3ATI(GL_LERP_ATI, %s,          GL_NONE, %s, GL_ALPHA, GL_NONE, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
700                       debug_register(dstreg),
701                       debug_register(extrarg),
702                       debug_register(arg1), debug_argmod(argmod1),
703                       debug_register(arg2), debug_argmod(argmod2));
704                 GL_EXTCALL(glAlphaFragmentOp3ATI(GL_LERP_ATI, dstreg, GL_NONE,
705                                                  extrarg, GL_ALPHA, GL_NONE,
706                                                  arg1, GL_NONE, argmod1,
707                                                  arg2, GL_NONE, argmod2));
708                 break;
709
710             case WINED3DTOP_BLENDTEXTUREALPHAPM:
711                 arg0 = register_for_arg(WINED3DTA_TEXTURE, gl_info, stage, NULL, -1);
712                 TRACE("glAlphaFragmentOp3ATI(GL_MAD_ATI, %s,          GL_NONE, %s, GL_NONE, %s, %s, GL_ALPHA, GL_COMP_BIT_ATI, %s, GL_NONE, %s)\n",
713                       debug_register(dstreg),
714                       debug_register(arg2), debug_argmod(argmod2),
715                       debug_register(arg0),
716                       debug_register(arg1), debug_argmod(argmod1));
717                 GL_EXTCALL(glAlphaFragmentOp3ATI(GL_MAD_ATI, dstreg, GL_NONE,
718                                                  arg2, GL_NONE,  argmod2,
719                                                  arg0, GL_ALPHA, GL_COMP_BIT_ATI,
720                                                  arg1, GL_NONE,  argmod1));
721                 break;
722
723             /* D3DTOP_PREMODULATE ???? */
724
725             case WINED3DTOP_DOTPRODUCT3:
726                 TRACE("glAlphaFragmentOp2ATI(GL_DOT3_ATI, %s, GL_NONE, GL_4X_BIT_ATI, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
727                       debug_register(dstreg),
728                       debug_register(arg1), debug_argmod(argmod1 | GL_BIAS_BIT_ATI),
729                       debug_register(arg2), debug_argmod(argmod2 | GL_BIAS_BIT_ATI));
730                 GL_EXTCALL(glAlphaFragmentOp2ATI(GL_DOT3_ATI, dstreg, GL_4X_BIT_ATI,
731                                                  arg1, GL_NONE, argmod1 | GL_BIAS_BIT_ATI,
732                                                  arg2, GL_NONE, argmod2 | GL_BIAS_BIT_ATI));
733                 break;
734
735             case WINED3DTOP_MULTIPLYADD:
736                 TRACE("glAlphaFragmentOp3ATI(GL_MAD_ATI, %s,          GL_NONE, %s, GL_NONE, %s, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
737                       debug_register(dstreg),
738                       debug_register(arg0), debug_argmod(argmod0),
739                       debug_register(arg2), debug_argmod(argmod2),
740                       debug_register(arg1), debug_argmod(argmod1));
741                 GL_EXTCALL(glAlphaFragmentOp3ATI(GL_MAD_ATI, dstreg,          GL_NONE,
742                            arg0, GL_NONE, argmod0,
743                            arg2, GL_NONE, argmod2,
744                            arg1, GL_NONE, argmod1));
745                 break;
746
747             case WINED3DTOP_LERP:
748                 TRACE("glAlphaFragmentOp3ATI(GL_LERP_ATI, %s,          GL_NONE, %s, GL_NONE, %s, %s, GL_NONE, %s, %s, GL_NONE, %s)\n",
749                       debug_register(dstreg),
750                       debug_register(arg1), debug_argmod(argmod1),
751                       debug_register(arg2), debug_argmod(argmod2),
752                       debug_register(arg0), debug_argmod(argmod0));
753                 GL_EXTCALL(glAlphaFragmentOp3ATI(GL_LERP_ATI, dstreg, GL_NONE,
754                                                  arg1, GL_NONE, argmod1,
755                                                  arg2, GL_NONE, argmod2,
756                                                  arg0, GL_NONE, argmod0));
757                 break;
758
759             case WINED3DTOP_MODULATEINVALPHA_ADDCOLOR:
760             case WINED3DTOP_MODULATEALPHA_ADDCOLOR:
761             case WINED3DTOP_MODULATECOLOR_ADDALPHA:
762             case WINED3DTOP_MODULATEINVCOLOR_ADDALPHA:
763             case WINED3DTOP_BUMPENVMAP:
764             case WINED3DTOP_BUMPENVMAPLUMINANCE:
765                 ERR("Application uses an invalid alpha operation\n");
766                 break;
767
768             default: FIXME("Unhandled alpha operation %d on stage %d\n", op[stage].aop, stage);
769         }
770     }
771
772     TRACE("glEndFragmentShaderATI()\n");
773     GL_EXTCALL(glEndFragmentShaderATI());
774     checkGLcall("GL_EXTCALL(glEndFragmentShaderATI())");
775     return ret;
776 }
777 #undef GLINFO_LOCATION
778
779 #define GLINFO_LOCATION stateblock->wineD3DDevice->adapter->gl_info
780 static void set_tex_op_atifs(DWORD state, IWineD3DStateBlockImpl *stateblock, WineD3DContext *context) {
781     IWineD3DDeviceImpl          *This = stateblock->wineD3DDevice;
782     struct atifs_ffp_desc       *desc;
783     struct texture_stage_op     op[MAX_TEXTURES];
784     struct atifs_private_data   *priv = (struct atifs_private_data *) This->shader_priv;
785
786     gen_ffp_op(stateblock, op);
787     desc = (struct atifs_ffp_desc *) find_ffp_shader(&priv->fragment_shaders, op);
788     if(!desc) {
789         desc = HeapAlloc(GetProcessHeap(), 0, sizeof(*desc));
790         if(!desc) {
791             ERR("Out of memory\n");
792             return;
793         }
794         memcpy(desc->parent.op, op, sizeof(op));
795         desc->shader = gen_ati_shader(op, &GLINFO_LOCATION);
796         add_ffp_shader(&priv->fragment_shaders, &desc->parent);
797         TRACE("Allocated fixed function replacement shader descriptor %p\n", desc);
798     }
799
800     GL_EXTCALL(glBindFragmentShaderATI(desc->shader));
801 }
802
803 static void state_texfactor_atifs(DWORD state, IWineD3DStateBlockImpl *stateblock, WineD3DContext *context) {
804     float col[4];
805     D3DCOLORTOGLFLOAT4(stateblock->renderState[WINED3DRS_TEXTUREFACTOR], col);
806
807     GL_EXTCALL(glSetFragmentShaderConstantATI(ATI_FFP_CONST_TFACTOR, col));
808     checkGLcall("glSetFragmentShaderConstantATI(ATI_FFP_CONST_TFACTOR, col)");
809 }
810
811 static void set_bumpmat(DWORD state, IWineD3DStateBlockImpl *stateblock, WineD3DContext *context) {
812     DWORD stage = (state - STATE_TEXTURESTAGE(0, 0)) / WINED3D_HIGHEST_TEXTURE_STATE;
813     float mat[2][2];
814
815     mat[0][0] = *((float *) &stateblock->textureState[stage][WINED3DTSS_BUMPENVMAT00]);
816     mat[1][0] = *((float *) &stateblock->textureState[stage][WINED3DTSS_BUMPENVMAT01]);
817     mat[0][1] = *((float *) &stateblock->textureState[stage][WINED3DTSS_BUMPENVMAT10]);
818     mat[1][1] = *((float *) &stateblock->textureState[stage][WINED3DTSS_BUMPENVMAT11]);
819     /* GL_ATI_fragment_shader allows only constants from 0.0 to 1.0, but the bumpmat
820      * constants can be in any range. While they should stay between [-1.0 and 1.0] because
821      * Shader Model 1.x pixel shaders are clamped to that range negative values are used occasionally,
822      * for example by our d3d9 test. So to get negative values scale -1;1 to 0;1 and undo that in the
823      * shader(it is free). This might potentially reduce precision. However, if the hardware does
824      * support proper floats it shouldn't, and if it doesn't we can't get anything better anyway
825      */
826     mat[0][0] = (mat[0][0] + 1.0) * 0.5;
827     mat[1][0] = (mat[1][0] + 1.0) * 0.5;
828     mat[0][1] = (mat[0][1] + 1.0) * 0.5;
829     mat[1][1] = (mat[1][1] + 1.0) * 0.5;
830     GL_EXTCALL(glSetFragmentShaderConstantATI(ATI_FFP_CONST_BUMPMAT(stage), (float *) mat));
831     checkGLcall("glSetFragmentShaderConstantATI(ATI_FFP_CONST_BUMPMAT(stage), mat)");
832
833     /* FIXME: This should go away
834      * This is currently needed because atifs borrows a pixel shader implementation
835      * from somewhere else, but consumes bump map matrix change events. The other pixel
836      * shader implementation may need notification about the change to update the texbem
837      * constants. Once ATIFS supports real shaders on its own, and GLSL/ARB have a replacement
838      * pipeline this call can go away
839      *
840      * FIXME2: Even considering this workaround calling FFPStateTable directly isn't nice
841      * as well. Better would be to call the model's table we inherit from, but currently
842      * it is always the FFP table, and as soon as this changes we can remove the call anyway
843      */
844     FFPStateTable[state].apply(state, stateblock, context);
845 }
846 #undef GLINFO_LOCATION
847
848 /* our state table. Borrows lots of stuff from the base implementation */
849 struct StateEntry ATIFSStateTable[STATE_HIGHEST + 1];
850
851 static void init_state_table() {
852     unsigned int i;
853     const DWORD rep = STATE_TEXTURESTAGE(0, WINED3DTSS_COLOROP);
854     memcpy(ATIFSStateTable, arb_program_shader_backend.StateTable, sizeof(ATIFSStateTable));
855
856     for(i = 0; i < MAX_TEXTURES; i++) {
857         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_COLOROP)].apply = set_tex_op_atifs;
858         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_COLOROP)].representative = rep;
859         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_COLORARG1)].apply = set_tex_op_atifs;
860         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_COLORARG1)].representative = rep;
861         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_COLORARG2)].apply = set_tex_op_atifs;
862         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_COLORARG2)].representative = rep;
863         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_COLORARG0)].apply = set_tex_op_atifs;
864         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_COLORARG0)].representative = rep;
865
866         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_ALPHAOP)].apply = set_tex_op_atifs;
867         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_ALPHAOP)].representative = rep;
868         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_ALPHAARG1)].apply = set_tex_op_atifs;
869         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_ALPHAARG1)].representative = rep;
870         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_ALPHAARG2)].apply = set_tex_op_atifs;
871         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_ALPHAARG2)].representative = rep;
872         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_ALPHAARG0)].apply = set_tex_op_atifs;
873         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_ALPHAARG0)].representative = rep;
874
875         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_RESULTARG)].apply = set_tex_op_atifs;
876         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_RESULTARG)].representative = rep;
877
878         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_BUMPENVMAT00)].apply = set_bumpmat;
879         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_BUMPENVMAT01)].apply = set_bumpmat;
880         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_BUMPENVMAT10)].apply = set_bumpmat;
881         ATIFSStateTable[STATE_TEXTURESTAGE(i, WINED3DTSS_BUMPENVMAT11)].apply = set_bumpmat;
882     }
883
884     ATIFSStateTable[STATE_RENDER(WINED3DRS_TEXTUREFACTOR)].apply = state_texfactor_atifs;
885     ATIFSStateTable[STATE_RENDER(WINED3DRS_TEXTUREFACTOR)].representative = STATE_RENDER(WINED3DRS_TEXTUREFACTOR);
886 }
887
888 /* GL_ATI_fragment_shader backend.It borrows a lot from a the
889  * ARB shader backend, currently the whole vertex processing
890  * code. This code would also forward pixel shaders, but if
891  * GL_ARB_fragment_program is supported, the atifs shader backend
892  * is not used.
893  */
894 static void shader_atifs_select(IWineD3DDevice *iface, BOOL usePS, BOOL useVS) {
895     arb_program_shader_backend.shader_select(iface, usePS, useVS);
896 }
897
898 static void shader_atifs_select_depth_blt(IWineD3DDevice *iface) {
899     arb_program_shader_backend.shader_select_depth_blt(iface);
900 }
901
902 static void shader_atifs_destroy_depth_blt(IWineD3DDevice *iface) {
903     arb_program_shader_backend.shader_destroy_depth_blt(iface);
904 }
905
906 static void shader_atifs_load_constants(IWineD3DDevice *iface, char usePS, char useVS) {
907     arb_program_shader_backend.shader_load_constants(iface, usePS, useVS);
908 }
909
910 static void shader_atifs_cleanup(IWineD3DDevice *iface) {
911     arb_program_shader_backend.shader_cleanup(iface);
912 }
913
914 static void shader_atifs_color_correction(SHADER_OPCODE_ARG* arg) {
915     arb_program_shader_backend.shader_color_correction(arg);
916 }
917
918 static void shader_atifs_destroy(IWineD3DBaseShader *iface) {
919     arb_program_shader_backend.shader_destroy(iface);
920 }
921
922 static HRESULT shader_atifs_alloc(IWineD3DDevice *iface) {
923     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *) iface;
924     HRESULT hr;
925     struct atifs_private_data *priv;
926     hr = arb_program_shader_backend.shader_alloc_private(iface);
927     if(FAILED(hr)) return hr;
928
929     This->shader_priv = HeapReAlloc(GetProcessHeap(), 0, This->shader_priv,
930                                     sizeof(struct atifs_private_data));
931     priv = (struct atifs_private_data *) This->shader_priv;
932     list_init(&priv->fragment_shaders);
933     return WINED3D_OK;
934 }
935
936 #define GLINFO_LOCATION This->adapter->gl_info
937 static void shader_atifs_free(IWineD3DDevice *iface) {
938     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *) iface;
939     struct atifs_private_data *priv = (struct atifs_private_data *) This->shader_priv;
940     struct ffp_desc *entry, *entry2;
941     struct atifs_ffp_desc *entry_ati;
942
943     ENTER_GL();
944     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &priv->fragment_shaders, struct ffp_desc, entry) {
945         entry_ati = (struct atifs_ffp_desc *) entry;
946         GL_EXTCALL(glDeleteFragmentShaderATI(entry_ati->shader));
947         checkGLcall("glDeleteFragmentShaderATI(entry->shader)");
948         list_remove(&entry->entry);
949         HeapFree(GetProcessHeap(), 0, entry);
950     }
951     LEAVE_GL();
952
953     /* Not actually needed, but revert what we've done before */
954     This->shader_priv = HeapReAlloc(GetProcessHeap(), 0, This->shader_priv,
955                                     sizeof(struct shader_arb_priv));
956     arb_program_shader_backend.shader_free_private(iface);
957 }
958 #undef GLINFO_LOCATION
959
960 static BOOL shader_atifs_dirty_const(IWineD3DDevice *iface) {
961     return arb_program_shader_backend.shader_dirtifyable_constants(iface);
962 }
963
964 static void shader_atifs_load_init(void) {
965     arb_program_shader_backend.shader_dll_load_init();
966     init_state_table();
967 }
968
969 static void shader_atifs_get_caps(WINED3DDEVTYPE devtype, WineD3D_GL_Info *gl_info, struct shader_caps *caps) {
970     arb_program_shader_backend.shader_get_caps(devtype, gl_info, caps);
971
972     caps->TextureOpCaps =  WINED3DTEXOPCAPS_DISABLE                     |
973                            WINED3DTEXOPCAPS_SELECTARG1                  |
974                            WINED3DTEXOPCAPS_SELECTARG2                  |
975                            WINED3DTEXOPCAPS_MODULATE4X                  |
976                            WINED3DTEXOPCAPS_MODULATE2X                  |
977                            WINED3DTEXOPCAPS_MODULATE                    |
978                            WINED3DTEXOPCAPS_ADDSIGNED2X                 |
979                            WINED3DTEXOPCAPS_ADDSIGNED                   |
980                            WINED3DTEXOPCAPS_ADD                         |
981                            WINED3DTEXOPCAPS_SUBTRACT                    |
982                            WINED3DTEXOPCAPS_ADDSMOOTH                   |
983                            WINED3DTEXOPCAPS_BLENDCURRENTALPHA           |
984                            WINED3DTEXOPCAPS_BLENDFACTORALPHA            |
985                            WINED3DTEXOPCAPS_BLENDTEXTUREALPHA           |
986                            WINED3DTEXOPCAPS_BLENDDIFFUSEALPHA           |
987                            WINED3DTEXOPCAPS_BLENDTEXTUREALPHAPM         |
988                            WINED3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR      |
989                            WINED3DTEXOPCAPS_MODULATECOLOR_ADDALPHA      |
990                            WINED3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA   |
991                            WINED3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR   |
992                            WINED3DTEXOPCAPS_DOTPRODUCT3                 |
993                            WINED3DTEXOPCAPS_MULTIPLYADD                 |
994                            WINED3DTEXOPCAPS_LERP                        |
995                            WINED3DTEXOPCAPS_BUMPENVMAP;
996
997     /* TODO: Implement WINED3DTEXOPCAPS_BUMPENVMAPLUMINANCE
998     and WINED3DTEXOPCAPS_PREMODULATE */
999
1000     /* GL_ATI_fragment_shader only supports up to 6 textures, which was the limit on r200 cards
1001      * which this extension is exclusively focused on(later cards have GL_ARB_fragment_program).
1002      * If the current card has more than 8 fixed function textures in OpenGL's regular fixed
1003      * function pipeline then the ATI_fragment_shader backend imposes a stricter limit. This
1004      * shouldn't be too hard since Nvidia cards have a limit of 4 textures with the default ffp
1005      * pipeline, and almost all games are happy with that. We can however support up to 8
1006      * texture stages because we have a 2nd pass limit of 8 instructions, and per stage we use
1007      * only 1 instruction.
1008      *
1009      * The proper fix for this is not to use GL_ATI_fragment_shader on cards newer than the
1010      * r200 series and use an ARB or GLSL shader instead
1011      */
1012     if(caps->MaxSimultaneousTextures > 6) {
1013         WARN("OpenGL fixed function supports %d simultaneous textures,\n", caps->MaxSimultaneousTextures);
1014         WARN("but GL_ATI_fragment_shader limits this to 6\n");
1015         caps->MaxSimultaneousTextures = 6;
1016     }
1017
1018     caps->PrimitiveMiscCaps |= WINED3DPMISCCAPS_TSSARGTEMP;
1019 }
1020
1021 static void shader_atifs_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer) {
1022     ERR("Should not get here\n");
1023 }
1024
1025 static void shader_atifs_generate_vshader(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer) {
1026     arb_program_shader_backend.shader_generate_vshader(iface, buffer);
1027 }
1028
1029 const shader_backend_t atifs_shader_backend = {
1030     shader_atifs_select,
1031     shader_atifs_select_depth_blt,
1032     shader_atifs_destroy_depth_blt,
1033     shader_atifs_load_constants,
1034     shader_atifs_cleanup,
1035     shader_atifs_color_correction,
1036     shader_atifs_destroy,
1037     shader_atifs_alloc,
1038     shader_atifs_free,
1039     shader_atifs_dirty_const,
1040     shader_atifs_generate_pshader,
1041     shader_atifs_generate_vshader,
1042     shader_atifs_get_caps,
1043     shader_atifs_load_init,
1044     ATIFSStateTable
1045 };