d3dcompiler: Implement ID3D11ShaderReflectionConstantBuffer::GetDesc().
[wine] / dlls / d3dcompiler_43 / bytecodewriter.c
1 /*
2  * Direct3D bytecode output functions
3  *
4  * Copyright 2008 Stefan Dösinger
5  * Copyright 2009 Matteo Bruni
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20  *
21  */
22
23 #include "config.h"
24 #include "wine/port.h"
25 #include "wine/debug.h"
26
27 #include "d3d9types.h"
28 #include "d3dcompiler_private.h"
29
30 WINE_DEFAULT_DEBUG_CHANNEL(asmshader);
31
32 /****************************************************************
33  * General assembler shader construction helper routines follow *
34  ****************************************************************/
35 /* struct instruction *alloc_instr
36  *
37  * Allocates a new instruction structure with srcs registers
38  *
39  * Parameters:
40  *  srcs: Number of source registers to allocate
41  *
42  * Returns:
43  *  A pointer to the allocated instruction structure
44  *  NULL in case of an allocation failure
45  */
46 struct instruction *alloc_instr(unsigned int srcs) {
47     struct instruction *ret = asm_alloc(sizeof(*ret));
48     if(!ret) {
49         ERR("Failed to allocate memory for an instruction structure\n");
50         return NULL;
51     }
52
53     if(srcs) {
54         ret->src = asm_alloc(srcs * sizeof(*ret->src));
55         if(!ret->src) {
56             ERR("Failed to allocate memory for instruction registers\n");
57             asm_free(ret);
58             return NULL;
59         }
60         ret->num_srcs = srcs;
61     }
62     return ret;
63 }
64
65 /* void add_instruction
66  *
67  * Adds a new instruction to the shader's instructions array and grows the instruction array
68  * if needed.
69  *
70  * The function does NOT copy the instruction structure. Make sure not to release the
71  * instruction or any of its substructures like registers.
72  *
73  * Parameters:
74  *  shader: Shader to add the instruction to
75  *  instr: Instruction to add to the shader
76  */
77 BOOL add_instruction(struct bwriter_shader *shader, struct instruction *instr) {
78     struct instruction      **new_instructions;
79
80     if(!shader) return FALSE;
81
82     if(shader->instr_alloc_size == 0) {
83         shader->instr = asm_alloc(sizeof(*shader->instr) * INSTRARRAY_INITIAL_SIZE);
84         if(!shader->instr) {
85             ERR("Failed to allocate the shader instruction array\n");
86             return FALSE;
87         }
88         shader->instr_alloc_size = INSTRARRAY_INITIAL_SIZE;
89     } else if(shader->instr_alloc_size == shader->num_instrs) {
90         new_instructions = asm_realloc(shader->instr,
91                                        sizeof(*shader->instr) * (shader->instr_alloc_size) * 2);
92         if(!new_instructions) {
93             ERR("Failed to grow the shader instruction array\n");
94             return FALSE;
95         }
96         shader->instr = new_instructions;
97         shader->instr_alloc_size = shader->instr_alloc_size * 2;
98     } else if(shader->num_instrs > shader->instr_alloc_size) {
99         ERR("More instructions than allocated. This should not happen\n");
100         return FALSE;
101     }
102
103     shader->instr[shader->num_instrs] = instr;
104     shader->num_instrs++;
105     return TRUE;
106 }
107
108 BOOL add_constF(struct bwriter_shader *shader, DWORD reg, float x, float y, float z, float w) {
109     struct constant *newconst;
110
111     if(shader->num_cf) {
112         struct constant **newarray;
113         newarray = asm_realloc(shader->constF,
114                                sizeof(*shader->constF) * (shader->num_cf + 1));
115         if(!newarray) {
116             ERR("Failed to grow the constants array\n");
117             return FALSE;
118         }
119         shader->constF = newarray;
120     } else {
121         shader->constF = asm_alloc(sizeof(*shader->constF));
122         if(!shader->constF) {
123             ERR("Failed to allocate the constants array\n");
124             return FALSE;
125         }
126     }
127
128     newconst = asm_alloc(sizeof(*newconst));
129     if(!newconst) {
130         ERR("Failed to allocate a new constant\n");
131         return FALSE;
132     }
133     newconst->regnum = reg;
134     newconst->value[0].f = x;
135     newconst->value[1].f = y;
136     newconst->value[2].f = z;
137     newconst->value[3].f = w;
138     shader->constF[shader->num_cf] = newconst;
139
140     shader->num_cf++;
141     return TRUE;
142 }
143
144 BOOL add_constI(struct bwriter_shader *shader, DWORD reg, INT x, INT y, INT z, INT w) {
145     struct constant *newconst;
146
147     if(shader->num_ci) {
148         struct constant **newarray;
149         newarray = asm_realloc(shader->constI,
150                                sizeof(*shader->constI) * (shader->num_ci + 1));
151         if(!newarray) {
152             ERR("Failed to grow the constants array\n");
153             return FALSE;
154         }
155         shader->constI = newarray;
156     } else {
157         shader->constI = asm_alloc(sizeof(*shader->constI));
158         if(!shader->constI) {
159             ERR("Failed to allocate the constants array\n");
160             return FALSE;
161         }
162     }
163
164     newconst = asm_alloc(sizeof(*newconst));
165     if(!newconst) {
166         ERR("Failed to allocate a new constant\n");
167         return FALSE;
168     }
169     newconst->regnum = reg;
170     newconst->value[0].i = x;
171     newconst->value[1].i = y;
172     newconst->value[2].i = z;
173     newconst->value[3].i = w;
174     shader->constI[shader->num_ci] = newconst;
175
176     shader->num_ci++;
177     return TRUE;
178 }
179
180 BOOL add_constB(struct bwriter_shader *shader, DWORD reg, BOOL x) {
181     struct constant *newconst;
182
183     if(shader->num_cb) {
184         struct constant **newarray;
185         newarray = asm_realloc(shader->constB,
186                                sizeof(*shader->constB) * (shader->num_cb + 1));
187         if(!newarray) {
188             ERR("Failed to grow the constants array\n");
189             return FALSE;
190         }
191         shader->constB = newarray;
192     } else {
193         shader->constB = asm_alloc(sizeof(*shader->constB));
194         if(!shader->constB) {
195             ERR("Failed to allocate the constants array\n");
196             return FALSE;
197         }
198     }
199
200     newconst = asm_alloc(sizeof(*newconst));
201     if(!newconst) {
202         ERR("Failed to allocate a new constant\n");
203         return FALSE;
204     }
205     newconst->regnum = reg;
206     newconst->value[0].b = x;
207     shader->constB[shader->num_cb] = newconst;
208
209     shader->num_cb++;
210     return TRUE;
211 }
212
213 BOOL record_declaration(struct bwriter_shader *shader, DWORD usage,
214                         DWORD usage_idx, DWORD mod, BOOL output,
215                         DWORD regnum, DWORD writemask, BOOL builtin) {
216     unsigned int *num;
217     struct declaration **decl;
218     unsigned int i;
219
220     if(!shader) return FALSE;
221
222     if(output) {
223         num = &shader->num_outputs;
224         decl = &shader->outputs;
225     } else {
226         num = &shader->num_inputs;
227         decl = &shader->inputs;
228     }
229
230     if(*num == 0) {
231         *decl = asm_alloc(sizeof(**decl));
232         if(!*decl) {
233             ERR("Error allocating declarations array\n");
234             return FALSE;
235         }
236     } else {
237         struct declaration *newdecl;
238         for(i = 0; i < *num; i++) {
239             if((*decl)[i].regnum == regnum && ((*decl)[i].writemask & writemask)) {
240                 WARN("Declaration of register %u already exists, writemask match 0x%x\n",
241                       regnum, (*decl)[i].writemask & writemask);
242             }
243         }
244
245         newdecl = asm_realloc(*decl,
246                               sizeof(**decl) * ((*num) + 1));
247         if(!newdecl) {
248             ERR("Error reallocating declarations array\n");
249             return FALSE;
250         }
251         *decl = newdecl;
252     }
253     (*decl)[*num].usage = usage;
254     (*decl)[*num].usage_idx = usage_idx;
255     (*decl)[*num].regnum = regnum;
256     (*decl)[*num].mod = mod;
257     (*decl)[*num].writemask = writemask;
258     (*decl)[*num].builtin = builtin;
259     (*num)++;
260
261     return TRUE;
262 }
263
264 BOOL record_sampler(struct bwriter_shader *shader, DWORD samptype, DWORD mod, DWORD regnum) {
265     unsigned int i;
266
267     if(!shader) return FALSE;
268
269     if(shader->num_samplers == 0) {
270         shader->samplers = asm_alloc(sizeof(*shader->samplers));
271         if(!shader->samplers) {
272             ERR("Error allocating samplers array\n");
273             return FALSE;
274         }
275     } else {
276         struct samplerdecl *newarray;
277
278         for(i = 0; i < shader->num_samplers; i++) {
279             if(shader->samplers[i].regnum == regnum) {
280                 WARN("Sampler %u already declared\n", regnum);
281                 /* This is not an error as far as the assembler is concerned.
282                  * Direct3D might refuse to load the compiled shader though
283                  */
284             }
285         }
286
287         newarray = asm_realloc(shader->samplers,
288                                sizeof(*shader->samplers) * (shader->num_samplers + 1));
289         if(!newarray) {
290             ERR("Error reallocating samplers array\n");
291             return FALSE;
292         }
293         shader->samplers = newarray;
294     }
295
296     shader->samplers[shader->num_samplers].type = samptype;
297     shader->samplers[shader->num_samplers].mod = mod;
298     shader->samplers[shader->num_samplers].regnum = regnum;
299     shader->num_samplers++;
300     return TRUE;
301 }
302
303
304 /* shader bytecode buffer manipulation functions.
305  * allocate_buffer creates a new buffer structure, put_dword adds a new
306  * DWORD to the buffer. In the rare case of a memory allocation failure
307  * when trying to grow the buffer a flag is set in the buffer to mark it
308  * invalid. This avoids return value checking and passing in many places
309  */
310 static struct bytecode_buffer *allocate_buffer(void) {
311     struct bytecode_buffer *ret;
312
313     ret = asm_alloc(sizeof(*ret));
314     if(!ret) return NULL;
315
316     ret->alloc_size = BYTECODEBUFFER_INITIAL_SIZE;
317     ret->data = asm_alloc(sizeof(DWORD) * ret->alloc_size);
318     if(!ret->data) {
319         asm_free(ret);
320         return NULL;
321     }
322     ret->state = S_OK;
323     return ret;
324 }
325
326 static void put_dword(struct bytecode_buffer *buffer, DWORD value) {
327     if(FAILED(buffer->state)) return;
328
329     if(buffer->alloc_size == buffer->size) {
330         DWORD *newarray;
331         buffer->alloc_size *= 2;
332         newarray = asm_realloc(buffer->data,
333                                sizeof(DWORD) * buffer->alloc_size);
334         if(!newarray) {
335             ERR("Failed to grow the buffer data memory\n");
336             buffer->state = E_OUTOFMEMORY;
337             return;
338         }
339         buffer->data = newarray;
340     }
341     buffer->data[buffer->size++] = value;
342 }
343
344 /* bwriter -> d3d9 conversion functions. */
345 static DWORD d3d9_swizzle(DWORD bwriter_swizzle)
346 {
347     /* Currently a NOP, but this allows changing the internal definitions
348      * without side effects. */
349     DWORD ret = 0;
350
351     if ((bwriter_swizzle & BWRITERVS_X_X) == BWRITERVS_X_X) ret |= D3DVS_X_X;
352     if ((bwriter_swizzle & BWRITERVS_X_Y) == BWRITERVS_X_Y) ret |= D3DVS_X_Y;
353     if ((bwriter_swizzle & BWRITERVS_X_Z) == BWRITERVS_X_Z) ret |= D3DVS_X_Z;
354     if ((bwriter_swizzle & BWRITERVS_X_W) == BWRITERVS_X_W) ret |= D3DVS_X_W;
355
356     if ((bwriter_swizzle & BWRITERVS_Y_X) == BWRITERVS_Y_X) ret |= D3DVS_Y_X;
357     if ((bwriter_swizzle & BWRITERVS_Y_Y) == BWRITERVS_Y_Y) ret |= D3DVS_Y_Y;
358     if ((bwriter_swizzle & BWRITERVS_Y_Z) == BWRITERVS_Y_Z) ret |= D3DVS_Y_Z;
359     if ((bwriter_swizzle & BWRITERVS_Y_W) == BWRITERVS_Y_W) ret |= D3DVS_Y_W;
360
361     if ((bwriter_swizzle & BWRITERVS_Z_X) == BWRITERVS_Z_X) ret |= D3DVS_Z_X;
362     if ((bwriter_swizzle & BWRITERVS_Z_Y) == BWRITERVS_Z_Y) ret |= D3DVS_Z_Y;
363     if ((bwriter_swizzle & BWRITERVS_Z_Z) == BWRITERVS_Z_Z) ret |= D3DVS_Z_Z;
364     if ((bwriter_swizzle & BWRITERVS_Z_W) == BWRITERVS_Z_W) ret |= D3DVS_Z_W;
365
366     if ((bwriter_swizzle & BWRITERVS_W_X) == BWRITERVS_W_X) ret |= D3DVS_W_X;
367     if ((bwriter_swizzle & BWRITERVS_W_Y) == BWRITERVS_W_Y) ret |= D3DVS_W_Y;
368     if ((bwriter_swizzle & BWRITERVS_W_Z) == BWRITERVS_W_Z) ret |= D3DVS_W_Z;
369     if ((bwriter_swizzle & BWRITERVS_W_W) == BWRITERVS_W_W) ret |= D3DVS_W_W;
370
371     return ret;
372 }
373
374 static DWORD d3d9_writemask(DWORD bwriter_writemask)
375 {
376     DWORD ret = 0;
377
378     if (bwriter_writemask & BWRITERSP_WRITEMASK_0) ret |= D3DSP_WRITEMASK_0;
379     if (bwriter_writemask & BWRITERSP_WRITEMASK_1) ret |= D3DSP_WRITEMASK_1;
380     if (bwriter_writemask & BWRITERSP_WRITEMASK_2) ret |= D3DSP_WRITEMASK_2;
381     if (bwriter_writemask & BWRITERSP_WRITEMASK_3) ret |= D3DSP_WRITEMASK_3;
382
383     return ret;
384 }
385
386 static DWORD d3d9_srcmod(DWORD bwriter_srcmod)
387 {
388     switch (bwriter_srcmod)
389     {
390         case BWRITERSPSM_NONE:       return D3DSPSM_NONE;
391         case BWRITERSPSM_NEG:        return D3DSPSM_NEG;
392         case BWRITERSPSM_BIAS:       return D3DSPSM_BIAS;
393         case BWRITERSPSM_BIASNEG:    return D3DSPSM_BIASNEG;
394         case BWRITERSPSM_SIGN:       return D3DSPSM_SIGN;
395         case BWRITERSPSM_SIGNNEG:    return D3DSPSM_SIGNNEG;
396         case BWRITERSPSM_COMP:       return D3DSPSM_COMP;
397         case BWRITERSPSM_X2:         return D3DSPSM_X2;
398         case BWRITERSPSM_X2NEG:      return D3DSPSM_X2NEG;
399         case BWRITERSPSM_DZ:         return D3DSPSM_DZ;
400         case BWRITERSPSM_DW:         return D3DSPSM_DW;
401         case BWRITERSPSM_ABS:        return D3DSPSM_ABS;
402         case BWRITERSPSM_ABSNEG:     return D3DSPSM_ABSNEG;
403         case BWRITERSPSM_NOT:        return D3DSPSM_NOT;
404         default:
405             FIXME("Unhandled BWRITERSPSM token %#x.\n", bwriter_srcmod);
406             return 0;
407     }
408 }
409
410 static DWORD d3d9_dstmod(DWORD bwriter_mod)
411 {
412     DWORD ret = 0;
413
414     if (bwriter_mod & BWRITERSPDM_SATURATE)         ret |= D3DSPDM_SATURATE;
415     if (bwriter_mod & BWRITERSPDM_PARTIALPRECISION) ret |= D3DSPDM_PARTIALPRECISION;
416     if (bwriter_mod & BWRITERSPDM_MSAMPCENTROID)    ret |= D3DSPDM_MSAMPCENTROID;
417
418     return ret;
419 }
420
421 static DWORD d3d9_comparetype(DWORD asmshader_comparetype)
422 {
423     switch (asmshader_comparetype)
424     {
425         case BWRITER_COMPARISON_GT:     return D3DSPC_GT;
426         case BWRITER_COMPARISON_EQ:     return D3DSPC_EQ;
427         case BWRITER_COMPARISON_GE:     return D3DSPC_GE;
428         case BWRITER_COMPARISON_LT:     return D3DSPC_LT;
429         case BWRITER_COMPARISON_NE:     return D3DSPC_NE;
430         case BWRITER_COMPARISON_LE:     return D3DSPC_LE;
431         default:
432             FIXME("Unexpected BWRITER_COMPARISON type %#x.\n", asmshader_comparetype);
433             return 0;
434     }
435 }
436
437 static DWORD d3d9_sampler(DWORD bwriter_sampler)
438 {
439     if (bwriter_sampler == BWRITERSTT_UNKNOWN)  return D3DSTT_UNKNOWN;
440     if (bwriter_sampler == BWRITERSTT_1D)       return D3DSTT_1D;
441     if (bwriter_sampler == BWRITERSTT_2D)       return D3DSTT_2D;
442     if (bwriter_sampler == BWRITERSTT_CUBE)     return D3DSTT_CUBE;
443     if (bwriter_sampler == BWRITERSTT_VOLUME)   return D3DSTT_VOLUME;
444     FIXME("Unexpected BWRITERSAMPLER_TEXTURE_TYPE type %#x.\n", bwriter_sampler);
445
446     return 0;
447 }
448
449 static DWORD d3d9_register(DWORD bwriter_register)
450 {
451     if (bwriter_register == BWRITERSPR_TEMP)        return D3DSPR_TEMP;
452     if (bwriter_register == BWRITERSPR_INPUT)       return D3DSPR_INPUT;
453     if (bwriter_register == BWRITERSPR_CONST)       return D3DSPR_CONST;
454     if (bwriter_register == BWRITERSPR_ADDR)        return D3DSPR_ADDR;
455     if (bwriter_register == BWRITERSPR_TEXTURE)     return D3DSPR_TEXTURE;
456     if (bwriter_register == BWRITERSPR_RASTOUT)     return D3DSPR_RASTOUT;
457     if (bwriter_register == BWRITERSPR_ATTROUT)     return D3DSPR_ATTROUT;
458     if (bwriter_register == BWRITERSPR_TEXCRDOUT)   return D3DSPR_TEXCRDOUT;
459     if (bwriter_register == BWRITERSPR_OUTPUT)      return D3DSPR_OUTPUT;
460     if (bwriter_register == BWRITERSPR_CONSTINT)    return D3DSPR_CONSTINT;
461     if (bwriter_register == BWRITERSPR_COLOROUT)    return D3DSPR_COLOROUT;
462     if (bwriter_register == BWRITERSPR_DEPTHOUT)    return D3DSPR_DEPTHOUT;
463     if (bwriter_register == BWRITERSPR_SAMPLER)     return D3DSPR_SAMPLER;
464     if (bwriter_register == BWRITERSPR_CONSTBOOL)   return D3DSPR_CONSTBOOL;
465     if (bwriter_register == BWRITERSPR_LOOP)        return D3DSPR_LOOP;
466     if (bwriter_register == BWRITERSPR_MISCTYPE)    return D3DSPR_MISCTYPE;
467     if (bwriter_register == BWRITERSPR_LABEL)       return D3DSPR_LABEL;
468     if (bwriter_register == BWRITERSPR_PREDICATE)   return D3DSPR_PREDICATE;
469
470     FIXME("Unexpected BWRITERSPR %#x.\n", bwriter_register);
471     return ~0U;
472 }
473
474 static DWORD d3d9_opcode(DWORD bwriter_opcode)
475 {
476     switch (bwriter_opcode)
477     {
478         case BWRITERSIO_NOP:         return D3DSIO_NOP;
479         case BWRITERSIO_MOV:         return D3DSIO_MOV;
480         case BWRITERSIO_ADD:         return D3DSIO_ADD;
481         case BWRITERSIO_SUB:         return D3DSIO_SUB;
482         case BWRITERSIO_MAD:         return D3DSIO_MAD;
483         case BWRITERSIO_MUL:         return D3DSIO_MUL;
484         case BWRITERSIO_RCP:         return D3DSIO_RCP;
485         case BWRITERSIO_RSQ:         return D3DSIO_RSQ;
486         case BWRITERSIO_DP3:         return D3DSIO_DP3;
487         case BWRITERSIO_DP4:         return D3DSIO_DP4;
488         case BWRITERSIO_MIN:         return D3DSIO_MIN;
489         case BWRITERSIO_MAX:         return D3DSIO_MAX;
490         case BWRITERSIO_SLT:         return D3DSIO_SLT;
491         case BWRITERSIO_SGE:         return D3DSIO_SGE;
492         case BWRITERSIO_EXP:         return D3DSIO_EXP;
493         case BWRITERSIO_LOG:         return D3DSIO_LOG;
494         case BWRITERSIO_LIT:         return D3DSIO_LIT;
495         case BWRITERSIO_DST:         return D3DSIO_DST;
496         case BWRITERSIO_LRP:         return D3DSIO_LRP;
497         case BWRITERSIO_FRC:         return D3DSIO_FRC;
498         case BWRITERSIO_M4x4:        return D3DSIO_M4x4;
499         case BWRITERSIO_M4x3:        return D3DSIO_M4x3;
500         case BWRITERSIO_M3x4:        return D3DSIO_M3x4;
501         case BWRITERSIO_M3x3:        return D3DSIO_M3x3;
502         case BWRITERSIO_M3x2:        return D3DSIO_M3x2;
503         case BWRITERSIO_CALL:        return D3DSIO_CALL;
504         case BWRITERSIO_CALLNZ:      return D3DSIO_CALLNZ;
505         case BWRITERSIO_LOOP:        return D3DSIO_LOOP;
506         case BWRITERSIO_RET:         return D3DSIO_RET;
507         case BWRITERSIO_ENDLOOP:     return D3DSIO_ENDLOOP;
508         case BWRITERSIO_LABEL:       return D3DSIO_LABEL;
509         case BWRITERSIO_DCL:         return D3DSIO_DCL;
510         case BWRITERSIO_POW:         return D3DSIO_POW;
511         case BWRITERSIO_CRS:         return D3DSIO_CRS;
512         case BWRITERSIO_SGN:         return D3DSIO_SGN;
513         case BWRITERSIO_ABS:         return D3DSIO_ABS;
514         case BWRITERSIO_NRM:         return D3DSIO_NRM;
515         case BWRITERSIO_SINCOS:      return D3DSIO_SINCOS;
516         case BWRITERSIO_REP:         return D3DSIO_REP;
517         case BWRITERSIO_ENDREP:      return D3DSIO_ENDREP;
518         case BWRITERSIO_IF:          return D3DSIO_IF;
519         case BWRITERSIO_IFC:         return D3DSIO_IFC;
520         case BWRITERSIO_ELSE:        return D3DSIO_ELSE;
521         case BWRITERSIO_ENDIF:       return D3DSIO_ENDIF;
522         case BWRITERSIO_BREAK:       return D3DSIO_BREAK;
523         case BWRITERSIO_BREAKC:      return D3DSIO_BREAKC;
524         case BWRITERSIO_MOVA:        return D3DSIO_MOVA;
525         case BWRITERSIO_DEFB:        return D3DSIO_DEFB;
526         case BWRITERSIO_DEFI:        return D3DSIO_DEFI;
527
528         case BWRITERSIO_TEXCOORD:    return D3DSIO_TEXCOORD;
529         case BWRITERSIO_TEXKILL:     return D3DSIO_TEXKILL;
530         case BWRITERSIO_TEX:         return D3DSIO_TEX;
531         case BWRITERSIO_TEXBEM:      return D3DSIO_TEXBEM;
532         case BWRITERSIO_TEXBEML:     return D3DSIO_TEXBEML;
533         case BWRITERSIO_TEXREG2AR:   return D3DSIO_TEXREG2AR;
534         case BWRITERSIO_TEXREG2GB:   return D3DSIO_TEXREG2GB;
535         case BWRITERSIO_TEXM3x2PAD:  return D3DSIO_TEXM3x2PAD;
536         case BWRITERSIO_TEXM3x2TEX:  return D3DSIO_TEXM3x2TEX;
537         case BWRITERSIO_TEXM3x3PAD:  return D3DSIO_TEXM3x3PAD;
538         case BWRITERSIO_TEXM3x3TEX:  return D3DSIO_TEXM3x3TEX;
539         case BWRITERSIO_TEXM3x3SPEC: return D3DSIO_TEXM3x3SPEC;
540         case BWRITERSIO_TEXM3x3VSPEC:return D3DSIO_TEXM3x3VSPEC;
541         case BWRITERSIO_EXPP:        return D3DSIO_EXPP;
542         case BWRITERSIO_LOGP:        return D3DSIO_LOGP;
543         case BWRITERSIO_CND:         return D3DSIO_CND;
544         case BWRITERSIO_DEF:         return D3DSIO_DEF;
545         case BWRITERSIO_TEXREG2RGB:  return D3DSIO_TEXREG2RGB;
546         case BWRITERSIO_TEXDP3TEX:   return D3DSIO_TEXDP3TEX;
547         case BWRITERSIO_TEXM3x2DEPTH:return D3DSIO_TEXM3x2DEPTH;
548         case BWRITERSIO_TEXDP3:      return D3DSIO_TEXDP3;
549         case BWRITERSIO_TEXM3x3:     return D3DSIO_TEXM3x3;
550         case BWRITERSIO_TEXDEPTH:    return D3DSIO_TEXDEPTH;
551         case BWRITERSIO_CMP:         return D3DSIO_CMP;
552         case BWRITERSIO_BEM:         return D3DSIO_BEM;
553         case BWRITERSIO_DP2ADD:      return D3DSIO_DP2ADD;
554         case BWRITERSIO_DSX:         return D3DSIO_DSX;
555         case BWRITERSIO_DSY:         return D3DSIO_DSY;
556         case BWRITERSIO_TEXLDD:      return D3DSIO_TEXLDD;
557         case BWRITERSIO_SETP:        return D3DSIO_SETP;
558         case BWRITERSIO_TEXLDL:      return D3DSIO_TEXLDL;
559         case BWRITERSIO_BREAKP:      return D3DSIO_BREAKP;
560
561         case BWRITERSIO_PHASE:       return D3DSIO_PHASE;
562         case BWRITERSIO_COMMENT:     return D3DSIO_COMMENT;
563         case BWRITERSIO_END:         return D3DSIO_END;
564
565         case BWRITERSIO_TEXLDP:      return D3DSIO_TEX | D3DSI_TEXLD_PROJECT;
566         case BWRITERSIO_TEXLDB:      return D3DSIO_TEX | D3DSI_TEXLD_BIAS;
567
568         default:
569             FIXME("Unhandled BWRITERSIO token %#x.\n", bwriter_opcode);
570             return ~0U;
571     }
572 }
573
574 /******************************************************
575  * Implementation of the writer functions starts here *
576  ******************************************************/
577 static void write_declarations(struct bc_writer *This,
578                                struct bytecode_buffer *buffer, BOOL len,
579                                const struct declaration *decls, unsigned int num, DWORD type) {
580     DWORD i;
581     DWORD instr_dcl = D3DSIO_DCL;
582     DWORD token;
583     struct shader_reg reg;
584
585     ZeroMemory(&reg, sizeof(reg));
586
587     if(len) {
588         instr_dcl |= 2 << D3DSI_INSTLENGTH_SHIFT;
589     }
590
591     for(i = 0; i < num; i++) {
592         if(decls[i].builtin) continue;
593
594         /* Write the DCL instruction */
595         put_dword(buffer, instr_dcl);
596
597         /* Write the usage and index */
598         token = (1 << 31); /* Bit 31 of non-instruction opcodes is 1 */
599         token |= (decls[i].usage << D3DSP_DCL_USAGE_SHIFT) & D3DSP_DCL_USAGE_MASK;
600         token |= (decls[i].usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT) & D3DSP_DCL_USAGEINDEX_MASK;
601         put_dword(buffer, token);
602
603         /* Write the dest register */
604         reg.type = type;
605         reg.regnum = decls[i].regnum;
606         reg.u.writemask = decls[i].writemask;
607         This->funcs->dstreg(This, &reg, buffer, 0, decls[i].mod);
608     }
609 }
610
611 static void write_const(struct constant **consts, int num, DWORD opcode, DWORD reg_type, struct bytecode_buffer *buffer, BOOL len) {
612     DWORD i;
613     DWORD instr_def = opcode;
614     const DWORD reg = (1<<31) |
615                       ((reg_type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
616                       ((reg_type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
617                       D3DSP_WRITEMASK_ALL;
618
619     if(len) {
620         if(opcode == D3DSIO_DEFB)
621             instr_def |= 2 << D3DSI_INSTLENGTH_SHIFT;
622         else
623             instr_def |= 5 << D3DSI_INSTLENGTH_SHIFT;
624     }
625
626     for(i = 0; i < num; i++) {
627         /* Write the DEF instruction */
628         put_dword(buffer, instr_def);
629
630         put_dword(buffer, reg | (consts[i]->regnum & D3DSP_REGNUM_MASK));
631         put_dword(buffer, consts[i]->value[0].d);
632         if(opcode != D3DSIO_DEFB) {
633             put_dword(buffer, consts[i]->value[1].d);
634             put_dword(buffer, consts[i]->value[2].d);
635             put_dword(buffer, consts[i]->value[3].d);
636         }
637     }
638 }
639
640 static void write_constF(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
641     write_const(shader->constF, shader->num_cf, D3DSIO_DEF, D3DSPR_CONST, buffer, len);
642 }
643
644 /* This function looks for VS 1/2 registers mapping to VS 3 output registers */
645 static HRESULT vs_find_builtin_varyings(struct bc_writer *This, const struct bwriter_shader *shader) {
646     DWORD i;
647     DWORD usage, usage_idx, writemask, regnum;
648
649     for(i = 0; i < shader->num_outputs; i++) {
650         if(!shader->outputs[i].builtin) continue;
651
652         usage = shader->outputs[i].usage;
653         usage_idx = shader->outputs[i].usage_idx;
654         writemask = shader->outputs[i].writemask;
655         regnum = shader->outputs[i].regnum;
656
657         switch(usage) {
658             case BWRITERDECLUSAGE_POSITION:
659             case BWRITERDECLUSAGE_POSITIONT:
660                 if(usage_idx > 0) {
661                     WARN("dcl_position%u not supported in sm 1/2 shaders\n", usage_idx);
662                     return E_INVALIDARG;
663                 }
664                 TRACE("o%u is oPos\n", regnum);
665                 This->oPos_regnum = regnum;
666                 break;
667
668             case BWRITERDECLUSAGE_COLOR:
669                 if(usage_idx > 1) {
670                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
671                     return E_INVALIDARG;
672                 }
673                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
674                     WARN("Only WRITEMASK_ALL is supported on color in sm 1/2\n");
675                     return E_INVALIDARG;
676                 }
677                 TRACE("o%u is oD%u\n", regnum, usage_idx);
678                 This->oD_regnum[usage_idx] = regnum;
679                 break;
680
681             case BWRITERDECLUSAGE_TEXCOORD:
682                 if(usage_idx >= 8) {
683                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
684                     return E_INVALIDARG;
685                 }
686                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
687                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
688                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
689                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
690                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
691                     return E_INVALIDARG;
692                 }
693                 TRACE("o%u is oT%u\n", regnum, usage_idx);
694                 This->oT_regnum[usage_idx] = regnum;
695                 break;
696
697             case BWRITERDECLUSAGE_PSIZE:
698                 if(usage_idx > 0) {
699                     WARN("dcl_psize%u not supported in sm 1/2 shaders\n", usage_idx);
700                     return E_INVALIDARG;
701                 }
702                 TRACE("o%u writemask 0x%08x is oPts\n", regnum, writemask);
703                 This->oPts_regnum = regnum;
704                 This->oPts_mask = writemask;
705                 break;
706
707             case BWRITERDECLUSAGE_FOG:
708                 if(usage_idx > 0) {
709                     WARN("dcl_fog%u not supported in sm 1 shaders\n", usage_idx);
710                     return E_INVALIDARG;
711                 }
712                 if(writemask != BWRITERSP_WRITEMASK_0 && writemask != BWRITERSP_WRITEMASK_1 &&
713                    writemask != BWRITERSP_WRITEMASK_2 && writemask != BWRITERSP_WRITEMASK_3) {
714                     WARN("Unsupported fog writemask\n");
715                     return E_INVALIDARG;
716                 }
717                 TRACE("o%u writemask 0x%08x is oFog\n", regnum, writemask);
718                 This->oFog_regnum = regnum;
719                 This->oFog_mask = writemask;
720                 break;
721
722             default:
723                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
724                 return E_INVALIDARG;
725         }
726     }
727
728     return S_OK;
729 }
730
731 static void vs_1_x_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
732     HRESULT hr;
733
734     if(shader->num_ci || shader->num_cb) {
735         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
736         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
737         This->state = E_INVALIDARG;
738         return;
739     }
740
741     hr = vs_find_builtin_varyings(This, shader);
742     if(FAILED(hr)) {
743         This->state = hr;
744         return;
745     }
746
747     /* Declare the shader type and version */
748     put_dword(buffer, This->version);
749
750     write_declarations(This, buffer, FALSE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
751     write_constF(shader, buffer, FALSE);
752     return;
753 }
754
755 static HRESULT find_ps_builtin_semantics(struct bc_writer *This,
756                                          const struct bwriter_shader *shader,
757                                          DWORD texcoords) {
758     DWORD i;
759     DWORD usage, usage_idx, writemask, regnum;
760
761     This->v_regnum[0] = -1; This->v_regnum[1] = -1;
762     for(i = 0; i < 8; i++) This->t_regnum[i] = -1;
763
764     for(i = 0; i < shader->num_inputs; i++) {
765         if(!shader->inputs[i].builtin) continue;
766
767         usage = shader->inputs[i].usage;
768         usage_idx = shader->inputs[i].usage_idx;
769         writemask = shader->inputs[i].writemask;
770         regnum = shader->inputs[i].regnum;
771
772         switch(usage) {
773             case BWRITERDECLUSAGE_COLOR:
774                 if(usage_idx > 1) {
775                     WARN("dcl_color%u not supported in sm 1 shaders\n", usage_idx);
776                     return E_INVALIDARG;
777                 }
778                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
779                     WARN("Only WRITEMASK_ALL is supported on color in sm 1\n");
780                     return E_INVALIDARG;
781                 }
782                 TRACE("v%u is v%u\n", regnum, usage_idx);
783                 This->v_regnum[usage_idx] = regnum;
784                 break;
785
786             case BWRITERDECLUSAGE_TEXCOORD:
787                 if(usage_idx > texcoords) {
788                     WARN("dcl_texcoord%u not supported in this shader version\n", usage_idx);
789                     return E_INVALIDARG;
790                 }
791                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
792                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
793                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
794                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
795                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
796                 } else {
797                     writemask = BWRITERSP_WRITEMASK_ALL;
798                 }
799                 TRACE("v%u is t%u\n", regnum, usage_idx);
800                 This->t_regnum[usage_idx] = regnum;
801                 break;
802
803             default:
804                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
805                 return E_INVALIDARG;
806         }
807     }
808
809     return S_OK;
810 }
811
812 static void ps_1_x_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
813     HRESULT hr;
814
815     /* First check the constants and varyings, and complain if unsupported things are used */
816     if(shader->num_ci || shader->num_cb) {
817         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
818         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
819         This->state = E_INVALIDARG;
820         return;
821     }
822
823     hr = find_ps_builtin_semantics(This, shader, 4);
824     if(FAILED(hr)) {
825         This->state = hr;
826         return;
827     }
828
829     /* Declare the shader type and version */
830     put_dword(buffer, This->version);
831     write_constF(shader, buffer, FALSE);
832 }
833
834 static void ps_1_4_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
835     HRESULT hr;
836
837     /* First check the constants and varyings, and complain if unsupported things are used */
838     if(shader->num_ci || shader->num_cb) {
839         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
840         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
841         This->state = E_INVALIDARG;
842         return;
843     }
844     hr = find_ps_builtin_semantics(This, shader, 6);
845     if(FAILED(hr)) {
846         This->state = hr;
847         return;
848     }
849
850     /* Declare the shader type and version */
851     put_dword(buffer, This->version);
852     write_constF(shader, buffer, FALSE);
853 }
854
855 static void end(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
856     put_dword(buffer, D3DSIO_END);
857 }
858
859 static DWORD map_vs_output(struct bc_writer *This, DWORD regnum, DWORD mask, DWORD *has_components) {
860     DWORD token = 0;
861     DWORD i;
862
863     *has_components = TRUE;
864     if(regnum == This->oPos_regnum) {
865         token |= (D3DSPR_RASTOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
866         token |= D3DSRO_POSITION & D3DSP_REGNUM_MASK; /* No shift */
867         return token;
868     }
869     if(regnum == This->oFog_regnum && mask == This->oFog_mask) {
870         token |= (D3DSPR_RASTOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
871         token |= D3DSRO_FOG & D3DSP_REGNUM_MASK; /* No shift */
872         token |= D3DSP_WRITEMASK_ALL;
873         *has_components = FALSE;
874         return token;
875     }
876     if(regnum == This->oPts_regnum && mask == This->oPts_mask) {
877         token |= (D3DSPR_RASTOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
878         token |= D3DSRO_POINT_SIZE & D3DSP_REGNUM_MASK; /* No shift */
879         token |= D3DSP_WRITEMASK_ALL;
880         *has_components = FALSE;
881         return token;
882     }
883     for(i = 0; i < 2; i++) {
884         if(regnum == This->oD_regnum[i]) {
885             token |= (D3DSPR_ATTROUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
886             token |= i & D3DSP_REGNUM_MASK; /* No shift */
887             return token;
888         }
889     }
890     for(i = 0; i < 8; i++) {
891         if(regnum == This->oT_regnum[i]) {
892             token |= (D3DSPR_TEXCRDOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
893             token |= i & D3DSP_REGNUM_MASK; /* No shift */
894             return token;
895         }
896     }
897
898     /* The varying must be undeclared - if an unsupported varying was declared,
899      * the vs_find_builtin_varyings function would have caught it and this code
900      * would not run */
901     WARN("Undeclared varying %u\n", regnum);
902     This->state = E_INVALIDARG;
903     return -1;
904 }
905
906 static void vs_12_dstreg(struct bc_writer *This, const struct shader_reg *reg,
907                          struct bytecode_buffer *buffer,
908                          DWORD shift, DWORD mod) {
909     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
910     DWORD has_wmask;
911
912     if(reg->rel_reg) {
913         WARN("Relative addressing not supported for destination registers\n");
914         This->state = E_INVALIDARG;
915         return;
916     }
917
918     switch(reg->type) {
919         case BWRITERSPR_OUTPUT:
920             token |= map_vs_output(This, reg->regnum, reg->u.writemask, &has_wmask);
921             break;
922
923         case BWRITERSPR_RASTOUT:
924         case BWRITERSPR_ATTROUT:
925             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
926             * but are unexpected. If we hit this path it might be due to an error.
927             */
928             FIXME("Unexpected register type %u\n", reg->type);
929             /* drop through */
930         case BWRITERSPR_INPUT:
931         case BWRITERSPR_TEMP:
932         case BWRITERSPR_CONST:
933             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
934             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
935             has_wmask = TRUE;
936             break;
937
938         case BWRITERSPR_ADDR:
939             if(reg->regnum != 0) {
940                 WARN("Only a0 exists\n");
941                 This->state = E_INVALIDARG;
942                 return;
943             }
944             token |= (D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
945             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
946             has_wmask = TRUE;
947             break;
948
949         case BWRITERSPR_PREDICATE:
950             if(This->version != BWRITERVS_VERSION(2, 1)){
951                 WARN("Predicate register is allowed only in vs_2_x\n");
952                 This->state = E_INVALIDARG;
953                 return;
954             }
955             if(reg->regnum != 0) {
956                 WARN("Only predicate register p0 exists\n");
957                 This->state = E_INVALIDARG;
958                 return;
959             }
960             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
961             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
962             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
963             has_wmask = TRUE;
964             break;
965
966         default:
967             WARN("Invalid register type for 1.x-2.x vertex shader\n");
968             This->state = E_INVALIDARG;
969             return;
970     }
971
972     /* strictly speaking there are no modifiers in vs_2_0 and vs_1_x, but they can be written
973      * into the bytecode and since the compiler doesn't do such checks write them
974      * (the checks are done by the undocumented shader validator)
975      */
976     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
977     token |= d3d9_dstmod(mod);
978
979     if(has_wmask) {
980         token |= d3d9_writemask(reg->u.writemask);
981     }
982     put_dword(buffer, token);
983 }
984
985 static void vs_1_x_srcreg(struct bc_writer *This, const struct shader_reg *reg,
986                           struct bytecode_buffer *buffer) {
987     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
988     DWORD has_swizzle;
989     DWORD component;
990
991     switch(reg->type) {
992         case BWRITERSPR_OUTPUT:
993             /* Map the swizzle to a writemask, the format expected
994                by map_vs_output
995              */
996             switch(reg->u.swizzle) {
997                 case BWRITERVS_SWIZZLE_X:
998                     component = BWRITERSP_WRITEMASK_0;
999                     break;
1000                 case BWRITERVS_SWIZZLE_Y:
1001                     component = BWRITERSP_WRITEMASK_1;
1002                     break;
1003                 case BWRITERVS_SWIZZLE_Z:
1004                     component = BWRITERSP_WRITEMASK_2;
1005                     break;
1006                 case BWRITERVS_SWIZZLE_W:
1007                     component = BWRITERSP_WRITEMASK_3;
1008                     break;
1009                 default:
1010                     component = 0;
1011             }
1012             token |= map_vs_output(This, reg->regnum, component, &has_swizzle);
1013             break;
1014
1015         case BWRITERSPR_RASTOUT:
1016         case BWRITERSPR_ATTROUT:
1017             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
1018              * but are unexpected. If we hit this path it might be due to an error.
1019              */
1020             FIXME("Unexpected register type %u\n", reg->type);
1021             /* drop through */
1022         case BWRITERSPR_INPUT:
1023         case BWRITERSPR_TEMP:
1024         case BWRITERSPR_CONST:
1025         case BWRITERSPR_ADDR:
1026             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1027             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1028             if(reg->rel_reg) {
1029                 if(reg->rel_reg->type != BWRITERSPR_ADDR ||
1030                    reg->rel_reg->regnum != 0 ||
1031                    reg->rel_reg->u.swizzle != BWRITERVS_SWIZZLE_X) {
1032                     WARN("Relative addressing in vs_1_x is only allowed with a0.x\n");
1033                     This->state = E_INVALIDARG;
1034                     return;
1035                 }
1036                 token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1037             }
1038             break;
1039
1040         default:
1041             WARN("Invalid register type for 1.x vshader\n");
1042             This->state = E_INVALIDARG;
1043             return;
1044     }
1045
1046     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1047
1048     token |= d3d9_srcmod(reg->srcmod);
1049     put_dword(buffer, token);
1050 }
1051
1052 static void write_srcregs(struct bc_writer *This, const struct instruction *instr,
1053                           struct bytecode_buffer *buffer){
1054     unsigned int i;
1055     if(instr->has_predicate){
1056         This->funcs->srcreg(This, &instr->predicate, buffer);
1057     }
1058     for(i = 0; i < instr->num_srcs; i++){
1059         This->funcs->srcreg(This, &instr->src[i], buffer);
1060     }
1061 }
1062
1063 static DWORD map_ps13_temp(struct bc_writer *This, const struct shader_reg *reg) {
1064     DWORD token = 0;
1065     if(reg->regnum == T0_REG) {
1066         token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1067         token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
1068     } else if(reg->regnum == T1_REG) {
1069         token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1070         token |= 1 & D3DSP_REGNUM_MASK; /* No shift */
1071     } else if(reg->regnum == T2_REG) {
1072         token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1073         token |= 2 & D3DSP_REGNUM_MASK; /* No shift */
1074     } else if(reg->regnum == T3_REG) {
1075         token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1076         token |= 3 & D3DSP_REGNUM_MASK; /* No shift */
1077     } else {
1078         token |= (D3DSPR_TEMP << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1079         token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1080     }
1081     return token;
1082 }
1083
1084 static DWORD map_ps_input(struct bc_writer *This,
1085                           const struct shader_reg *reg) {
1086     DWORD i, token = 0;
1087     /* Map color interpolators */
1088     for(i = 0; i < 2; i++) {
1089         if(reg->regnum == This->v_regnum[i]) {
1090             token |= (D3DSPR_INPUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1091             token |= i & D3DSP_REGNUM_MASK; /* No shift */
1092             return token;
1093         }
1094     }
1095     for(i = 0; i < 8; i++) {
1096         if(reg->regnum == This->t_regnum[i]) {
1097             token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1098             token |= i & D3DSP_REGNUM_MASK; /* No shift */
1099             return token;
1100         }
1101     }
1102
1103     WARN("Invalid ps 1/2 varying\n");
1104     This->state = E_INVALIDARG;
1105     return token;
1106 }
1107
1108 static void ps_1_0123_srcreg(struct bc_writer *This, const struct shader_reg *reg,
1109                              struct bytecode_buffer *buffer) {
1110     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1111     if(reg->rel_reg) {
1112         WARN("Relative addressing not supported in <= ps_3_0\n");
1113         This->state = E_INVALIDARG;
1114         return;
1115     }
1116
1117     switch(reg->type) {
1118         case BWRITERSPR_INPUT:
1119             token |= map_ps_input(This, reg);
1120             break;
1121
1122             /* Take care about the texture temporaries. There's a problem: They aren't
1123              * declared anywhere, so we can only hardcode the values that are used
1124              * to map ps_1_3 shaders to the common shader structure
1125              */
1126         case BWRITERSPR_TEMP:
1127             token |= map_ps13_temp(This, reg);
1128             break;
1129
1130         case BWRITERSPR_CONST: /* Can be mapped 1:1 */
1131             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1132             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1133             break;
1134
1135         default:
1136             WARN("Invalid register type for <= ps_1_3 shader\n");
1137             This->state = E_INVALIDARG;
1138             return;
1139     }
1140
1141     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1142
1143     if(reg->srcmod == BWRITERSPSM_DZ || reg->srcmod == BWRITERSPSM_DW ||
1144        reg->srcmod == BWRITERSPSM_ABS || reg->srcmod == BWRITERSPSM_ABSNEG ||
1145        reg->srcmod == BWRITERSPSM_NOT) {
1146         WARN("Invalid source modifier %u for <= ps_1_3\n", reg->srcmod);
1147         This->state = E_INVALIDARG;
1148         return;
1149     }
1150     token |= d3d9_srcmod(reg->srcmod);
1151     put_dword(buffer, token);
1152 }
1153
1154 static void ps_1_0123_dstreg(struct bc_writer *This, const struct shader_reg *reg,
1155                              struct bytecode_buffer *buffer,
1156                              DWORD shift, DWORD mod) {
1157     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1158
1159     if(reg->rel_reg) {
1160         WARN("Relative addressing not supported for destination registers\n");
1161         This->state = E_INVALIDARG;
1162         return;
1163     }
1164
1165     switch(reg->type) {
1166         case BWRITERSPR_TEMP:
1167             token |= map_ps13_temp(This, reg);
1168             break;
1169
1170         /* texkill uses the input register as a destination parameter */
1171         case BWRITERSPR_INPUT:
1172             token |= map_ps_input(This, reg);
1173             break;
1174
1175         default:
1176             WARN("Invalid dest register type for 1.x pshader\n");
1177             This->state = E_INVALIDARG;
1178             return;
1179     }
1180
1181     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1182     token |= d3d9_dstmod(mod);
1183
1184     token |= d3d9_writemask(reg->u.writemask);
1185     put_dword(buffer, token);
1186 }
1187
1188 /* The length of an instruction consists of the destination register (if any),
1189  * the number of source registers, the number of address registers used for
1190  * indirect addressing, and optionally the predicate register
1191  */
1192 static DWORD instrlen(const struct instruction *instr, unsigned int srcs, unsigned int dsts) {
1193     unsigned int i;
1194     DWORD ret = srcs + dsts + (instr->has_predicate ? 1 : 0);
1195
1196     if(dsts){
1197         if(instr->dst.rel_reg) ret++;
1198     }
1199     for(i = 0; i < srcs; i++) {
1200         if(instr->src[i].rel_reg) ret++;
1201     }
1202     return ret;
1203 }
1204
1205 static void sm_1_x_opcode(struct bc_writer *This,
1206                           const struct instruction *instr,
1207                           DWORD token, struct bytecode_buffer *buffer) {
1208     /* In sm_1_x instruction length isn't encoded */
1209     if(instr->coissue){
1210         token |= D3DSI_COISSUE;
1211     }
1212     put_dword(buffer, token);
1213 }
1214
1215 static void instr_handler(struct bc_writer *This,
1216                           const struct instruction *instr,
1217                           struct bytecode_buffer *buffer) {
1218     DWORD token = d3d9_opcode(instr->opcode);
1219
1220     This->funcs->opcode(This, instr, token, buffer);
1221     if(instr->has_dst) This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1222     write_srcregs(This, instr, buffer);
1223 }
1224
1225 static const struct instr_handler_table vs_1_x_handlers[] = {
1226     {BWRITERSIO_ADD,            instr_handler},
1227     {BWRITERSIO_NOP,            instr_handler},
1228     {BWRITERSIO_MOV,            instr_handler},
1229     {BWRITERSIO_SUB,            instr_handler},
1230     {BWRITERSIO_MAD,            instr_handler},
1231     {BWRITERSIO_MUL,            instr_handler},
1232     {BWRITERSIO_RCP,            instr_handler},
1233     {BWRITERSIO_RSQ,            instr_handler},
1234     {BWRITERSIO_DP3,            instr_handler},
1235     {BWRITERSIO_DP4,            instr_handler},
1236     {BWRITERSIO_MIN,            instr_handler},
1237     {BWRITERSIO_MAX,            instr_handler},
1238     {BWRITERSIO_SLT,            instr_handler},
1239     {BWRITERSIO_SGE,            instr_handler},
1240     {BWRITERSIO_EXP,            instr_handler},
1241     {BWRITERSIO_LOG,            instr_handler},
1242     {BWRITERSIO_EXPP,           instr_handler},
1243     {BWRITERSIO_LOGP,           instr_handler},
1244     {BWRITERSIO_DST,            instr_handler},
1245     {BWRITERSIO_FRC,            instr_handler},
1246     {BWRITERSIO_M4x4,           instr_handler},
1247     {BWRITERSIO_M4x3,           instr_handler},
1248     {BWRITERSIO_M3x4,           instr_handler},
1249     {BWRITERSIO_M3x3,           instr_handler},
1250     {BWRITERSIO_M3x2,           instr_handler},
1251     {BWRITERSIO_LIT,            instr_handler},
1252
1253     {BWRITERSIO_END,            NULL}, /* Sentinel value, it signals
1254                                           the end of the list */
1255 };
1256
1257 static const struct bytecode_backend vs_1_x_backend = {
1258     vs_1_x_header,
1259     end,
1260     vs_1_x_srcreg,
1261     vs_12_dstreg,
1262     sm_1_x_opcode,
1263     vs_1_x_handlers
1264 };
1265
1266 static void instr_ps_1_0123_texld(struct bc_writer *This,
1267                                   const struct instruction *instr,
1268                                   struct bytecode_buffer *buffer) {
1269     DWORD idx;
1270     struct shader_reg reg;
1271     DWORD swizzlemask;
1272
1273     if(instr->src[1].type != BWRITERSPR_SAMPLER ||
1274        instr->src[1].regnum > 3) {
1275         WARN("Unsupported sampler type %u regnum %u\n",
1276              instr->src[1].type, instr->src[1].regnum);
1277         This->state = E_INVALIDARG;
1278         return;
1279     } else if(instr->dst.type != BWRITERSPR_TEMP) {
1280         WARN("Can only sample into a temp register\n");
1281         This->state = E_INVALIDARG;
1282         return;
1283     }
1284
1285     idx = instr->src[1].regnum;
1286     if((idx == 0 && instr->dst.regnum != T0_REG) ||
1287        (idx == 1 && instr->dst.regnum != T1_REG) ||
1288        (idx == 2 && instr->dst.regnum != T2_REG) ||
1289        (idx == 3 && instr->dst.regnum != T3_REG)) {
1290         WARN("Sampling from sampler s%u to register r%u is not possible in ps_1_x\n",
1291              idx, instr->dst.regnum);
1292         This->state = E_INVALIDARG;
1293         return;
1294     }
1295     if(instr->src[0].type == BWRITERSPR_INPUT) {
1296         /* A simple non-dependent read tex instruction */
1297         if(instr->src[0].regnum != This->t_regnum[idx]) {
1298             WARN("Cannot sample from s%u with texture address data from interpolator %u\n",
1299                  idx, instr->src[0].regnum);
1300             This->state = E_INVALIDARG;
1301             return;
1302         }
1303         This->funcs->opcode(This, instr, D3DSIO_TEX & D3DSI_OPCODE_MASK, buffer);
1304
1305         /* map the temp dstreg to the ps_1_3 texture temporary register */
1306         This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1307     } else if(instr->src[0].type == BWRITERSPR_TEMP) {
1308
1309         swizzlemask = (3 << BWRITERVS_SWIZZLE_SHIFT) |
1310             (3 << (BWRITERVS_SWIZZLE_SHIFT + 2)) |
1311             (3 << (BWRITERVS_SWIZZLE_SHIFT + 4));
1312         if((instr->src[0].u.swizzle & swizzlemask) == (BWRITERVS_X_X | BWRITERVS_Y_Y | BWRITERVS_Z_Z)) {
1313             TRACE("writing texreg2rgb\n");
1314             This->funcs->opcode(This, instr, D3DSIO_TEXREG2RGB & D3DSI_OPCODE_MASK, buffer);
1315         } else if(instr->src[0].u.swizzle == (BWRITERVS_X_W | BWRITERVS_Y_X | BWRITERVS_Z_X | BWRITERVS_W_X)) {
1316             TRACE("writing texreg2ar\n");
1317             This->funcs->opcode(This, instr, D3DSIO_TEXREG2AR & D3DSI_OPCODE_MASK, buffer);
1318         } else if(instr->src[0].u.swizzle == (BWRITERVS_X_Y | BWRITERVS_Y_Z | BWRITERVS_Z_Z | BWRITERVS_W_Z)) {
1319             TRACE("writing texreg2gb\n");
1320             This->funcs->opcode(This, instr, D3DSIO_TEXREG2GB & D3DSI_OPCODE_MASK, buffer);
1321         } else {
1322             WARN("Unsupported src addr swizzle in dependent texld: 0x%08x\n", instr->src[0].u.swizzle);
1323             This->state = E_INVALIDARG;
1324             return;
1325         }
1326
1327         /* Dst and src reg can be mapped normally. Both registers are temporary registers in the
1328          * source shader and have to be mapped to the temporary form of the texture registers. However,
1329          * the src reg doesn't have a swizzle
1330          */
1331         This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1332         reg = instr->src[0];
1333         reg.u.swizzle = BWRITERVS_NOSWIZZLE;
1334         This->funcs->srcreg(This, &reg, buffer);
1335     } else {
1336         WARN("Invalid address data source register\n");
1337         This->state = E_INVALIDARG;
1338         return;
1339     }
1340 }
1341
1342 static void instr_ps_1_0123_mov(struct bc_writer *This,
1343                                 const struct instruction *instr,
1344                                 struct bytecode_buffer *buffer) {
1345     DWORD token = D3DSIO_MOV & D3DSI_OPCODE_MASK;
1346
1347     if(instr->dst.type == BWRITERSPR_TEMP && instr->src[0].type == BWRITERSPR_INPUT) {
1348         if((instr->dst.regnum == T0_REG && instr->src[0].regnum == This->t_regnum[0]) ||
1349            (instr->dst.regnum == T1_REG && instr->src[0].regnum == This->t_regnum[1]) ||
1350            (instr->dst.regnum == T2_REG && instr->src[0].regnum == This->t_regnum[2]) ||
1351            (instr->dst.regnum == T3_REG && instr->src[0].regnum == This->t_regnum[3])) {
1352             if(instr->dstmod & BWRITERSPDM_SATURATE) {
1353                 This->funcs->opcode(This, instr, D3DSIO_TEXCOORD & D3DSI_OPCODE_MASK, buffer);
1354                 /* Remove the SATURATE flag, it's implicit to the instruction */
1355                 This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod & (~BWRITERSPDM_SATURATE));
1356                 return;
1357             } else {
1358                 WARN("A varying -> temp copy is only supported with the SATURATE modifier in <=ps_1_3\n");
1359                 This->state = E_INVALIDARG;
1360                 return;
1361             }
1362         } else if(instr->src[0].regnum == This->v_regnum[0] ||
1363                   instr->src[0].regnum == This->v_regnum[1]) {
1364             /* Handled by the normal mov below. Just drop out of the if condition */
1365         } else {
1366             WARN("Unsupported varying -> temp mov in <= ps_1_3\n");
1367             This->state = E_INVALIDARG;
1368             return;
1369         }
1370     }
1371
1372     This->funcs->opcode(This, instr, token, buffer);
1373     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1374     This->funcs->srcreg(This, &instr->src[0], buffer);
1375 }
1376
1377 static const struct instr_handler_table ps_1_0123_handlers[] = {
1378     {BWRITERSIO_ADD,            instr_handler},
1379     {BWRITERSIO_NOP,            instr_handler},
1380     {BWRITERSIO_MOV,            instr_ps_1_0123_mov},
1381     {BWRITERSIO_SUB,            instr_handler},
1382     {BWRITERSIO_MAD,            instr_handler},
1383     {BWRITERSIO_MUL,            instr_handler},
1384     {BWRITERSIO_DP3,            instr_handler},
1385     {BWRITERSIO_DP4,            instr_handler},
1386     {BWRITERSIO_LRP,            instr_handler},
1387
1388     /* pshader instructions */
1389     {BWRITERSIO_CND,            instr_handler},
1390     {BWRITERSIO_CMP,            instr_handler},
1391     {BWRITERSIO_TEXKILL,        instr_handler},
1392     {BWRITERSIO_TEX,            instr_ps_1_0123_texld},
1393     {BWRITERSIO_TEXBEM,         instr_handler},
1394     {BWRITERSIO_TEXBEML,        instr_handler},
1395     {BWRITERSIO_TEXM3x2PAD,     instr_handler},
1396     {BWRITERSIO_TEXM3x3PAD,     instr_handler},
1397     {BWRITERSIO_TEXM3x3SPEC,    instr_handler},
1398     {BWRITERSIO_TEXM3x3VSPEC,   instr_handler},
1399     {BWRITERSIO_TEXM3x3TEX,     instr_handler},
1400     {BWRITERSIO_TEXM3x3,        instr_handler},
1401     {BWRITERSIO_TEXM3x2DEPTH,   instr_handler},
1402     {BWRITERSIO_TEXM3x2TEX,     instr_handler},
1403     {BWRITERSIO_TEXDP3,         instr_handler},
1404     {BWRITERSIO_TEXDP3TEX,      instr_handler},
1405     {BWRITERSIO_END,            NULL},
1406 };
1407
1408 static const struct bytecode_backend ps_1_0123_backend = {
1409     ps_1_x_header,
1410     end,
1411     ps_1_0123_srcreg,
1412     ps_1_0123_dstreg,
1413     sm_1_x_opcode,
1414     ps_1_0123_handlers
1415 };
1416
1417 static void ps_1_4_srcreg(struct bc_writer *This, const struct shader_reg *reg,
1418                           struct bytecode_buffer *buffer) {
1419     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1420     if(reg->rel_reg) {
1421         WARN("Relative addressing not supported in <= ps_3_0\n");
1422         This->state = E_INVALIDARG;
1423         return;
1424     }
1425
1426     switch(reg->type) {
1427         case BWRITERSPR_INPUT:
1428             token |= map_ps_input(This, reg);
1429             break;
1430
1431         /* Can be mapped 1:1 */
1432         case BWRITERSPR_TEMP:
1433         case BWRITERSPR_CONST:
1434             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1435             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1436             break;
1437
1438         default:
1439             WARN("Invalid register type for ps_1_4 shader\n");
1440             This->state = E_INVALIDARG;
1441             return;
1442     }
1443
1444     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1445
1446     if(reg->srcmod == BWRITERSPSM_ABS || reg->srcmod == BWRITERSPSM_ABSNEG ||
1447        reg->srcmod == BWRITERSPSM_NOT) {
1448         WARN("Invalid source modifier %u for ps_1_4\n", reg->srcmod);
1449         This->state = E_INVALIDARG;
1450         return;
1451     }
1452     token |= d3d9_srcmod(reg->srcmod);
1453     put_dword(buffer, token);
1454 }
1455
1456 static void ps_1_4_dstreg(struct bc_writer *This, const struct shader_reg *reg,
1457                           struct bytecode_buffer *buffer,
1458                           DWORD shift, DWORD mod) {
1459     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1460
1461     if(reg->rel_reg) {
1462         WARN("Relative addressing not supported for destination registers\n");
1463         This->state = E_INVALIDARG;
1464         return;
1465     }
1466
1467     switch(reg->type) {
1468         case BWRITERSPR_TEMP: /* 1:1 mapping */
1469             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1470             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1471             break;
1472
1473         /* For texkill */
1474         case BWRITERSPR_INPUT:
1475             token |= map_ps_input(This, reg);
1476             break;
1477
1478         default:
1479             WARN("Invalid dest register type for 1.x pshader\n");
1480             This->state = E_INVALIDARG;
1481             return;
1482     }
1483
1484     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1485     token |= d3d9_dstmod(mod);
1486
1487     token |= d3d9_writemask(reg->u.writemask);
1488     put_dword(buffer, token);
1489 }
1490
1491 static void instr_ps_1_4_mov(struct bc_writer *This,
1492                              const struct instruction *instr,
1493                              struct bytecode_buffer *buffer) {
1494     DWORD token = D3DSIO_MOV & D3DSI_OPCODE_MASK;
1495
1496     if(instr->dst.type == BWRITERSPR_TEMP && instr->src[0].type == BWRITERSPR_INPUT) {
1497         if(instr->src[0].regnum == This->t_regnum[0] ||
1498            instr->src[0].regnum == This->t_regnum[1] ||
1499            instr->src[0].regnum == This->t_regnum[2] ||
1500            instr->src[0].regnum == This->t_regnum[3] ||
1501            instr->src[0].regnum == This->t_regnum[4] ||
1502            instr->src[0].regnum == This->t_regnum[5]) {
1503             /* Similar to a regular mov, but a different opcode */
1504             token = D3DSIO_TEXCOORD & D3DSI_OPCODE_MASK;
1505         } else if(instr->src[0].regnum == This->v_regnum[0] ||
1506                   instr->src[0].regnum == This->v_regnum[1]) {
1507             /* Handled by the normal mov below. Just drop out of the if condition */
1508         } else {
1509             WARN("Unsupported varying -> temp mov in ps_1_4\n");
1510             This->state = E_INVALIDARG;
1511             return;
1512         }
1513     }
1514
1515     This->funcs->opcode(This, instr, token, buffer);
1516     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1517     This->funcs->srcreg(This, &instr->src[0], buffer);
1518 }
1519
1520 static void instr_ps_1_4_texld(struct bc_writer *This,
1521                                const struct instruction *instr,
1522                                struct bytecode_buffer *buffer) {
1523     if(instr->src[1].type != BWRITERSPR_SAMPLER ||
1524        instr->src[1].regnum > 5) {
1525         WARN("Unsupported sampler type %u regnum %u\n",
1526              instr->src[1].type, instr->src[1].regnum);
1527         This->state = E_INVALIDARG;
1528         return;
1529     } else if(instr->dst.type != BWRITERSPR_TEMP) {
1530         WARN("Can only sample into a temp register\n");
1531         This->state = E_INVALIDARG;
1532         return;
1533     }
1534
1535     if(instr->src[1].regnum != instr->dst.regnum) {
1536         WARN("Sampling from sampler s%u to register r%u is not possible in ps_1_4\n",
1537              instr->src[1].regnum, instr->dst.regnum);
1538         This->state = E_INVALIDARG;
1539         return;
1540     }
1541
1542     This->funcs->opcode(This, instr, D3DSIO_TEX & D3DSI_OPCODE_MASK, buffer);
1543     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1544     This->funcs->srcreg(This, &instr->src[0], buffer);
1545 }
1546
1547 static const struct instr_handler_table ps_1_4_handlers[] = {
1548     {BWRITERSIO_ADD,            instr_handler},
1549     {BWRITERSIO_NOP,            instr_handler},
1550     {BWRITERSIO_MOV,            instr_ps_1_4_mov},
1551     {BWRITERSIO_SUB,            instr_handler},
1552     {BWRITERSIO_MAD,            instr_handler},
1553     {BWRITERSIO_MUL,            instr_handler},
1554     {BWRITERSIO_DP3,            instr_handler},
1555     {BWRITERSIO_DP4,            instr_handler},
1556     {BWRITERSIO_LRP,            instr_handler},
1557
1558     /* pshader instructions */
1559     {BWRITERSIO_CND,            instr_handler},
1560     {BWRITERSIO_CMP,            instr_handler},
1561     {BWRITERSIO_TEXKILL,        instr_handler},
1562     {BWRITERSIO_TEX,            instr_ps_1_4_texld},
1563     {BWRITERSIO_TEXDEPTH,       instr_handler},
1564     {BWRITERSIO_BEM,            instr_handler},
1565
1566     {BWRITERSIO_PHASE,          instr_handler},
1567     {BWRITERSIO_END,            NULL},
1568 };
1569
1570 static const struct bytecode_backend ps_1_4_backend = {
1571     ps_1_4_header,
1572     end,
1573     ps_1_4_srcreg,
1574     ps_1_4_dstreg,
1575     sm_1_x_opcode,
1576     ps_1_4_handlers
1577 };
1578
1579 static void write_constB(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
1580     write_const(shader->constB, shader->num_cb, D3DSIO_DEFB, D3DSPR_CONSTBOOL, buffer, len);
1581 }
1582
1583 static void write_constI(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
1584     write_const(shader->constI, shader->num_ci, D3DSIO_DEFI, D3DSPR_CONSTINT, buffer, len);
1585 }
1586
1587 static void vs_2_header(struct bc_writer *This,
1588                         const struct bwriter_shader *shader,
1589                         struct bytecode_buffer *buffer) {
1590     HRESULT hr;
1591
1592     hr = vs_find_builtin_varyings(This, shader);
1593     if(FAILED(hr)) {
1594         This->state = hr;
1595         return;
1596     }
1597
1598     /* Declare the shader type and version */
1599     put_dword(buffer, This->version);
1600
1601     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1602     write_constF(shader, buffer, TRUE);
1603     write_constB(shader, buffer, TRUE);
1604     write_constI(shader, buffer, TRUE);
1605     return;
1606 }
1607
1608 static void vs_2_srcreg(struct bc_writer *This,
1609                         const struct shader_reg *reg,
1610                         struct bytecode_buffer *buffer) {
1611     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1612     DWORD has_swizzle;
1613     DWORD component;
1614     DWORD d3d9reg;
1615
1616     switch(reg->type) {
1617         case BWRITERSPR_OUTPUT:
1618             /* Map the swizzle to a writemask, the format expected
1619                by map_vs_output
1620              */
1621             switch(reg->u.swizzle) {
1622                 case BWRITERVS_SWIZZLE_X:
1623                     component = BWRITERSP_WRITEMASK_0;
1624                     break;
1625                 case BWRITERVS_SWIZZLE_Y:
1626                     component = BWRITERSP_WRITEMASK_1;
1627                     break;
1628                 case BWRITERVS_SWIZZLE_Z:
1629                     component = BWRITERSP_WRITEMASK_2;
1630                     break;
1631                 case BWRITERVS_SWIZZLE_W:
1632                     component = BWRITERSP_WRITEMASK_3;
1633                     break;
1634                 default:
1635                     component = 0;
1636             }
1637             token |= map_vs_output(This, reg->regnum, component, &has_swizzle);
1638             break;
1639
1640         case BWRITERSPR_RASTOUT:
1641         case BWRITERSPR_ATTROUT:
1642             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
1643              * but are unexpected. If we hit this path it might be due to an error.
1644              */
1645             FIXME("Unexpected register type %u\n", reg->type);
1646             /* drop through */
1647         case BWRITERSPR_INPUT:
1648         case BWRITERSPR_TEMP:
1649         case BWRITERSPR_CONST:
1650         case BWRITERSPR_ADDR:
1651         case BWRITERSPR_CONSTINT:
1652         case BWRITERSPR_CONSTBOOL:
1653         case BWRITERSPR_LABEL:
1654             d3d9reg = d3d9_register(reg->type);
1655             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1656             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1657             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1658             break;
1659
1660         case BWRITERSPR_LOOP:
1661             if(reg->regnum != 0) {
1662                 WARN("Only regnum 0 is supported for the loop index register in vs_2_0\n");
1663                 This->state = E_INVALIDARG;
1664                 return;
1665             }
1666             token |= (D3DSPR_LOOP << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1667             token |= (D3DSPR_LOOP << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1668             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
1669             break;
1670
1671         case BWRITERSPR_PREDICATE:
1672             if(This->version != BWRITERVS_VERSION(2, 1)){
1673                 WARN("Predicate register is allowed only in vs_2_x\n");
1674                 This->state = E_INVALIDARG;
1675                 return;
1676             }
1677             if(reg->regnum > 0) {
1678                 WARN("Only predicate register 0 is supported\n");
1679                 This->state = E_INVALIDARG;
1680                 return;
1681             }
1682             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1683             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1684             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
1685
1686             break;
1687
1688         default:
1689             WARN("Invalid register type for 2.0 vshader\n");
1690             This->state = E_INVALIDARG;
1691             return;
1692     }
1693
1694     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1695
1696     token |= d3d9_srcmod(reg->srcmod);
1697
1698     if(reg->rel_reg)
1699         token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1700
1701     put_dword(buffer, token);
1702
1703     /* vs_2_0 and newer write the register containing the index explicitly in the
1704      * binary code
1705      */
1706     if(token & D3DVS_ADDRMODE_RELATIVE)
1707         vs_2_srcreg(This, reg->rel_reg, buffer);
1708 }
1709
1710 static void sm_2_opcode(struct bc_writer *This,
1711                         const struct instruction *instr,
1712                         DWORD token, struct bytecode_buffer *buffer) {
1713     /* From sm 2 onwards instruction length is encoded in the opcode field */
1714     int dsts = instr->has_dst ? 1 : 0;
1715     token |= instrlen(instr, instr->num_srcs, dsts) << D3DSI_INSTLENGTH_SHIFT;
1716     if(instr->comptype)
1717         token |= (d3d9_comparetype(instr->comptype) << 16) & (0xf << 16);
1718     if(instr->has_predicate)
1719         token |= D3DSHADER_INSTRUCTION_PREDICATED;
1720     put_dword(buffer,token);
1721 }
1722
1723 static const struct instr_handler_table vs_2_0_handlers[] = {
1724     {BWRITERSIO_ADD,            instr_handler},
1725     {BWRITERSIO_NOP,            instr_handler},
1726     {BWRITERSIO_MOV,            instr_handler},
1727     {BWRITERSIO_SUB,            instr_handler},
1728     {BWRITERSIO_MAD,            instr_handler},
1729     {BWRITERSIO_MUL,            instr_handler},
1730     {BWRITERSIO_RCP,            instr_handler},
1731     {BWRITERSIO_RSQ,            instr_handler},
1732     {BWRITERSIO_DP3,            instr_handler},
1733     {BWRITERSIO_DP4,            instr_handler},
1734     {BWRITERSIO_MIN,            instr_handler},
1735     {BWRITERSIO_MAX,            instr_handler},
1736     {BWRITERSIO_SLT,            instr_handler},
1737     {BWRITERSIO_SGE,            instr_handler},
1738     {BWRITERSIO_ABS,            instr_handler},
1739     {BWRITERSIO_EXP,            instr_handler},
1740     {BWRITERSIO_LOG,            instr_handler},
1741     {BWRITERSIO_EXPP,           instr_handler},
1742     {BWRITERSIO_LOGP,           instr_handler},
1743     {BWRITERSIO_DST,            instr_handler},
1744     {BWRITERSIO_LRP,            instr_handler},
1745     {BWRITERSIO_FRC,            instr_handler},
1746     {BWRITERSIO_CRS,            instr_handler},
1747     {BWRITERSIO_SGN,            instr_handler},
1748     {BWRITERSIO_NRM,            instr_handler},
1749     {BWRITERSIO_SINCOS,         instr_handler},
1750     {BWRITERSIO_M4x4,           instr_handler},
1751     {BWRITERSIO_M4x3,           instr_handler},
1752     {BWRITERSIO_M3x4,           instr_handler},
1753     {BWRITERSIO_M3x3,           instr_handler},
1754     {BWRITERSIO_M3x2,           instr_handler},
1755     {BWRITERSIO_LIT,            instr_handler},
1756     {BWRITERSIO_POW,            instr_handler},
1757     {BWRITERSIO_MOVA,           instr_handler},
1758
1759     {BWRITERSIO_CALL,           instr_handler},
1760     {BWRITERSIO_CALLNZ,         instr_handler},
1761     {BWRITERSIO_REP,            instr_handler},
1762     {BWRITERSIO_ENDREP,         instr_handler},
1763     {BWRITERSIO_IF,             instr_handler},
1764     {BWRITERSIO_LABEL,          instr_handler},
1765     {BWRITERSIO_ELSE,           instr_handler},
1766     {BWRITERSIO_ENDIF,          instr_handler},
1767     {BWRITERSIO_LOOP,           instr_handler},
1768     {BWRITERSIO_RET,            instr_handler},
1769     {BWRITERSIO_ENDLOOP,        instr_handler},
1770
1771     {BWRITERSIO_END,            NULL},
1772 };
1773
1774 static const struct bytecode_backend vs_2_0_backend = {
1775     vs_2_header,
1776     end,
1777     vs_2_srcreg,
1778     vs_12_dstreg,
1779     sm_2_opcode,
1780     vs_2_0_handlers
1781 };
1782
1783 static const struct instr_handler_table vs_2_x_handlers[] = {
1784     {BWRITERSIO_ADD,            instr_handler},
1785     {BWRITERSIO_NOP,            instr_handler},
1786     {BWRITERSIO_MOV,            instr_handler},
1787     {BWRITERSIO_SUB,            instr_handler},
1788     {BWRITERSIO_MAD,            instr_handler},
1789     {BWRITERSIO_MUL,            instr_handler},
1790     {BWRITERSIO_RCP,            instr_handler},
1791     {BWRITERSIO_RSQ,            instr_handler},
1792     {BWRITERSIO_DP3,            instr_handler},
1793     {BWRITERSIO_DP4,            instr_handler},
1794     {BWRITERSIO_MIN,            instr_handler},
1795     {BWRITERSIO_MAX,            instr_handler},
1796     {BWRITERSIO_SLT,            instr_handler},
1797     {BWRITERSIO_SGE,            instr_handler},
1798     {BWRITERSIO_ABS,            instr_handler},
1799     {BWRITERSIO_EXP,            instr_handler},
1800     {BWRITERSIO_LOG,            instr_handler},
1801     {BWRITERSIO_EXPP,           instr_handler},
1802     {BWRITERSIO_LOGP,           instr_handler},
1803     {BWRITERSIO_DST,            instr_handler},
1804     {BWRITERSIO_LRP,            instr_handler},
1805     {BWRITERSIO_FRC,            instr_handler},
1806     {BWRITERSIO_CRS,            instr_handler},
1807     {BWRITERSIO_SGN,            instr_handler},
1808     {BWRITERSIO_NRM,            instr_handler},
1809     {BWRITERSIO_SINCOS,         instr_handler},
1810     {BWRITERSIO_M4x4,           instr_handler},
1811     {BWRITERSIO_M4x3,           instr_handler},
1812     {BWRITERSIO_M3x4,           instr_handler},
1813     {BWRITERSIO_M3x3,           instr_handler},
1814     {BWRITERSIO_M3x2,           instr_handler},
1815     {BWRITERSIO_LIT,            instr_handler},
1816     {BWRITERSIO_POW,            instr_handler},
1817     {BWRITERSIO_MOVA,           instr_handler},
1818
1819     {BWRITERSIO_CALL,           instr_handler},
1820     {BWRITERSIO_CALLNZ,         instr_handler},
1821     {BWRITERSIO_REP,            instr_handler},
1822     {BWRITERSIO_ENDREP,         instr_handler},
1823     {BWRITERSIO_IF,             instr_handler},
1824     {BWRITERSIO_LABEL,          instr_handler},
1825     {BWRITERSIO_IFC,            instr_handler},
1826     {BWRITERSIO_ELSE,           instr_handler},
1827     {BWRITERSIO_ENDIF,          instr_handler},
1828     {BWRITERSIO_BREAK,          instr_handler},
1829     {BWRITERSIO_BREAKC,         instr_handler},
1830     {BWRITERSIO_LOOP,           instr_handler},
1831     {BWRITERSIO_RET,            instr_handler},
1832     {BWRITERSIO_ENDLOOP,        instr_handler},
1833
1834     {BWRITERSIO_SETP,           instr_handler},
1835     {BWRITERSIO_BREAKP,         instr_handler},
1836
1837     {BWRITERSIO_END,            NULL},
1838 };
1839
1840 static const struct bytecode_backend vs_2_x_backend = {
1841     vs_2_header,
1842     end,
1843     vs_2_srcreg,
1844     vs_12_dstreg,
1845     sm_2_opcode,
1846     vs_2_x_handlers
1847 };
1848
1849 static void write_samplers(const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1850     DWORD i;
1851     DWORD instr_dcl = D3DSIO_DCL | (2 << D3DSI_INSTLENGTH_SHIFT);
1852     DWORD token;
1853     const DWORD reg = (1<<31) |
1854         ((D3DSPR_SAMPLER << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
1855         ((D3DSPR_SAMPLER << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
1856         D3DSP_WRITEMASK_ALL;
1857
1858     for(i = 0; i < shader->num_samplers; i++) {
1859         /* Write the DCL instruction */
1860         put_dword(buffer, instr_dcl);
1861         token = (1<<31);
1862         /* Already shifted */
1863         token |= (d3d9_sampler(shader->samplers[i].type)) & D3DSP_TEXTURETYPE_MASK;
1864         put_dword(buffer, token);
1865         token = reg | (shader->samplers[i].regnum & D3DSP_REGNUM_MASK);
1866         token |= d3d9_dstmod(shader->samplers[i].mod);
1867         put_dword(buffer, token);
1868     }
1869 }
1870
1871 static void ps_2_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1872     HRESULT hr = find_ps_builtin_semantics(This, shader, 8);
1873     if(FAILED(hr)) {
1874         This->state = hr;
1875         return;
1876     }
1877
1878     /* Declare the shader type and version */
1879     put_dword(buffer, This->version);
1880     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1881     write_samplers(shader, buffer);
1882     write_constF(shader, buffer, TRUE);
1883     write_constB(shader, buffer, TRUE);
1884     write_constI(shader, buffer, TRUE);
1885 }
1886
1887 static void ps_2_srcreg(struct bc_writer *This,
1888                         const struct shader_reg *reg,
1889                         struct bytecode_buffer *buffer) {
1890     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1891     DWORD d3d9reg;
1892     if(reg->rel_reg) {
1893         WARN("Relative addressing not supported in <= ps_3_0\n");
1894         This->state = E_INVALIDARG;
1895         return;
1896     }
1897
1898     switch(reg->type) {
1899         case BWRITERSPR_INPUT:
1900             token |= map_ps_input(This, reg);
1901             break;
1902
1903             /* Can be mapped 1:1 */
1904         case BWRITERSPR_TEMP:
1905         case BWRITERSPR_CONST:
1906         case BWRITERSPR_COLOROUT:
1907         case BWRITERSPR_CONSTBOOL:
1908         case BWRITERSPR_CONSTINT:
1909         case BWRITERSPR_SAMPLER:
1910         case BWRITERSPR_LABEL:
1911         case BWRITERSPR_DEPTHOUT:
1912             d3d9reg = d3d9_register(reg->type);
1913             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1914             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1915             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1916             break;
1917
1918         case BWRITERSPR_PREDICATE:
1919             if(This->version != BWRITERPS_VERSION(2, 1)){
1920                 WARN("Predicate register not supported in ps_2_0\n");
1921                 This->state = E_INVALIDARG;
1922             }
1923             if(reg->regnum) {
1924                 WARN("Predicate register with regnum %u not supported\n",
1925                      reg->regnum);
1926                 This->state = E_INVALIDARG;
1927             }
1928             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1929             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1930             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
1931             break;
1932
1933         default:
1934             WARN("Invalid register type for ps_2_0 shader\n");
1935             This->state = E_INVALIDARG;
1936             return;
1937     }
1938
1939     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1940
1941     token |= d3d9_srcmod(reg->srcmod);
1942     put_dword(buffer, token);
1943 }
1944
1945 static void ps_2_0_dstreg(struct bc_writer *This,
1946                           const struct shader_reg *reg,
1947                           struct bytecode_buffer *buffer,
1948                           DWORD shift, DWORD mod) {
1949     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1950     DWORD d3d9reg;
1951
1952     if(reg->rel_reg) {
1953         WARN("Relative addressing not supported for destination registers\n");
1954         This->state = E_INVALIDARG;
1955         return;
1956     }
1957
1958     switch(reg->type) {
1959         case BWRITERSPR_TEMP: /* 1:1 mapping */
1960         case BWRITERSPR_COLOROUT:
1961         case BWRITERSPR_DEPTHOUT:
1962             d3d9reg = d3d9_register(reg->type);
1963             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1964             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1965             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1966             break;
1967
1968         case BWRITERSPR_PREDICATE:
1969             if(This->version != BWRITERPS_VERSION(2, 1)){
1970                 WARN("Predicate register not supported in ps_2_0\n");
1971                 This->state = E_INVALIDARG;
1972             }
1973             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1974             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1975             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1976             break;
1977
1978         /* texkill uses the input register as a destination parameter */
1979         case BWRITERSPR_INPUT:
1980             token |= map_ps_input(This, reg);
1981             break;
1982
1983         default:
1984             WARN("Invalid dest register type for 2.x pshader\n");
1985             This->state = E_INVALIDARG;
1986             return;
1987     }
1988
1989     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1990     token |= d3d9_dstmod(mod);
1991
1992     token |= d3d9_writemask(reg->u.writemask);
1993     put_dword(buffer, token);
1994 }
1995
1996 static const struct instr_handler_table ps_2_0_handlers[] = {
1997     {BWRITERSIO_ADD,            instr_handler},
1998     {BWRITERSIO_NOP,            instr_handler},
1999     {BWRITERSIO_MOV,            instr_handler},
2000     {BWRITERSIO_SUB,            instr_handler},
2001     {BWRITERSIO_MAD,            instr_handler},
2002     {BWRITERSIO_MUL,            instr_handler},
2003     {BWRITERSIO_RCP,            instr_handler},
2004     {BWRITERSIO_RSQ,            instr_handler},
2005     {BWRITERSIO_DP3,            instr_handler},
2006     {BWRITERSIO_DP4,            instr_handler},
2007     {BWRITERSIO_MIN,            instr_handler},
2008     {BWRITERSIO_MAX,            instr_handler},
2009     {BWRITERSIO_ABS,            instr_handler},
2010     {BWRITERSIO_EXP,            instr_handler},
2011     {BWRITERSIO_LOG,            instr_handler},
2012     {BWRITERSIO_EXPP,           instr_handler},
2013     {BWRITERSIO_LOGP,           instr_handler},
2014     {BWRITERSIO_LRP,            instr_handler},
2015     {BWRITERSIO_FRC,            instr_handler},
2016     {BWRITERSIO_CRS,            instr_handler},
2017     {BWRITERSIO_NRM,            instr_handler},
2018     {BWRITERSIO_SINCOS,         instr_handler},
2019     {BWRITERSIO_M4x4,           instr_handler},
2020     {BWRITERSIO_M4x3,           instr_handler},
2021     {BWRITERSIO_M3x4,           instr_handler},
2022     {BWRITERSIO_M3x3,           instr_handler},
2023     {BWRITERSIO_M3x2,           instr_handler},
2024     {BWRITERSIO_POW,            instr_handler},
2025     {BWRITERSIO_DP2ADD,         instr_handler},
2026     {BWRITERSIO_CMP,            instr_handler},
2027
2028     {BWRITERSIO_TEX,            instr_handler},
2029     {BWRITERSIO_TEXLDP,         instr_handler},
2030     {BWRITERSIO_TEXLDB,         instr_handler},
2031     {BWRITERSIO_TEXKILL,        instr_handler},
2032
2033     {BWRITERSIO_END,            NULL},
2034 };
2035
2036 static const struct bytecode_backend ps_2_0_backend = {
2037     ps_2_header,
2038     end,
2039     ps_2_srcreg,
2040     ps_2_0_dstreg,
2041     sm_2_opcode,
2042     ps_2_0_handlers
2043 };
2044
2045 static const struct instr_handler_table ps_2_x_handlers[] = {
2046     {BWRITERSIO_ADD,            instr_handler},
2047     {BWRITERSIO_NOP,            instr_handler},
2048     {BWRITERSIO_MOV,            instr_handler},
2049     {BWRITERSIO_SUB,            instr_handler},
2050     {BWRITERSIO_MAD,            instr_handler},
2051     {BWRITERSIO_MUL,            instr_handler},
2052     {BWRITERSIO_RCP,            instr_handler},
2053     {BWRITERSIO_RSQ,            instr_handler},
2054     {BWRITERSIO_DP3,            instr_handler},
2055     {BWRITERSIO_DP4,            instr_handler},
2056     {BWRITERSIO_MIN,            instr_handler},
2057     {BWRITERSIO_MAX,            instr_handler},
2058     {BWRITERSIO_ABS,            instr_handler},
2059     {BWRITERSIO_EXP,            instr_handler},
2060     {BWRITERSIO_LOG,            instr_handler},
2061     {BWRITERSIO_EXPP,           instr_handler},
2062     {BWRITERSIO_LOGP,           instr_handler},
2063     {BWRITERSIO_LRP,            instr_handler},
2064     {BWRITERSIO_FRC,            instr_handler},
2065     {BWRITERSIO_CRS,            instr_handler},
2066     {BWRITERSIO_NRM,            instr_handler},
2067     {BWRITERSIO_SINCOS,         instr_handler},
2068     {BWRITERSIO_M4x4,           instr_handler},
2069     {BWRITERSIO_M4x3,           instr_handler},
2070     {BWRITERSIO_M3x4,           instr_handler},
2071     {BWRITERSIO_M3x3,           instr_handler},
2072     {BWRITERSIO_M3x2,           instr_handler},
2073     {BWRITERSIO_POW,            instr_handler},
2074     {BWRITERSIO_DP2ADD,         instr_handler},
2075     {BWRITERSIO_CMP,            instr_handler},
2076
2077     {BWRITERSIO_CALL,           instr_handler},
2078     {BWRITERSIO_CALLNZ,         instr_handler},
2079     {BWRITERSIO_REP,            instr_handler},
2080     {BWRITERSIO_ENDREP,         instr_handler},
2081     {BWRITERSIO_IF,             instr_handler},
2082     {BWRITERSIO_LABEL,          instr_handler},
2083     {BWRITERSIO_IFC,            instr_handler},
2084     {BWRITERSIO_ELSE,           instr_handler},
2085     {BWRITERSIO_ENDIF,          instr_handler},
2086     {BWRITERSIO_BREAK,          instr_handler},
2087     {BWRITERSIO_BREAKC,         instr_handler},
2088     {BWRITERSIO_RET,            instr_handler},
2089
2090     {BWRITERSIO_TEX,            instr_handler},
2091     {BWRITERSIO_TEXLDP,         instr_handler},
2092     {BWRITERSIO_TEXLDB,         instr_handler},
2093     {BWRITERSIO_TEXKILL,        instr_handler},
2094     {BWRITERSIO_DSX,            instr_handler},
2095     {BWRITERSIO_DSY,            instr_handler},
2096
2097     {BWRITERSIO_SETP,           instr_handler},
2098     {BWRITERSIO_BREAKP,         instr_handler},
2099
2100     {BWRITERSIO_TEXLDD,         instr_handler},
2101
2102     {BWRITERSIO_END,            NULL},
2103 };
2104
2105 static const struct bytecode_backend ps_2_x_backend = {
2106     ps_2_header,
2107     end,
2108     ps_2_srcreg,
2109     ps_2_0_dstreg,
2110     sm_2_opcode,
2111     ps_2_x_handlers
2112 };
2113
2114 static void sm_3_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
2115     /* Declare the shader type and version */
2116     put_dword(buffer, This->version);
2117
2118     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
2119     write_declarations(This, buffer, TRUE, shader->outputs, shader->num_outputs, BWRITERSPR_OUTPUT);
2120     write_constF(shader, buffer, TRUE);
2121     write_constB(shader, buffer, TRUE);
2122     write_constI(shader, buffer, TRUE);
2123     write_samplers(shader, buffer);
2124     return;
2125 }
2126
2127 static void sm_3_srcreg(struct bc_writer *This,
2128                         const struct shader_reg *reg,
2129                         struct bytecode_buffer *buffer) {
2130     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
2131     DWORD d3d9reg;
2132
2133     d3d9reg = d3d9_register(reg->type);
2134     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
2135     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
2136     token |= reg->regnum & D3DSP_REGNUM_MASK;
2137
2138     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK;
2139     token |= d3d9_srcmod(reg->srcmod);
2140
2141     if(reg->rel_reg) {
2142         if(reg->type == BWRITERSPR_CONST && This->version == BWRITERPS_VERSION(3, 0)) {
2143             WARN("c%u[...] is unsupported in ps_3_0\n", reg->regnum);
2144             This->state = E_INVALIDARG;
2145             return;
2146         }
2147         if(((reg->rel_reg->type == BWRITERSPR_ADDR && This->version == BWRITERVS_VERSION(3, 0)) ||
2148            reg->rel_reg->type == BWRITERSPR_LOOP) &&
2149            reg->rel_reg->regnum == 0) {
2150             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
2151         } else {
2152             WARN("Unsupported relative addressing register\n");
2153             This->state = E_INVALIDARG;
2154             return;
2155         }
2156     }
2157
2158     put_dword(buffer, token);
2159
2160     /* vs_2_0 and newer write the register containing the index explicitly in the
2161      * binary code
2162      */
2163     if(token & D3DVS_ADDRMODE_RELATIVE) {
2164         sm_3_srcreg(This, reg->rel_reg, buffer);
2165     }
2166 }
2167
2168 static void sm_3_dstreg(struct bc_writer *This,
2169                         const struct shader_reg *reg,
2170                         struct bytecode_buffer *buffer,
2171                         DWORD shift, DWORD mod) {
2172     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
2173     DWORD d3d9reg;
2174
2175     if(reg->rel_reg) {
2176         if(This->version == BWRITERVS_VERSION(3, 0) &&
2177            reg->type == BWRITERSPR_OUTPUT) {
2178             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
2179         } else {
2180             WARN("Relative addressing not supported for this shader type or register type\n");
2181             This->state = E_INVALIDARG;
2182             return;
2183         }
2184     }
2185
2186     d3d9reg = d3d9_register(reg->type);
2187     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
2188     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
2189     token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
2190
2191     token |= d3d9_dstmod(mod);
2192
2193     token |= d3d9_writemask(reg->u.writemask);
2194     put_dword(buffer, token);
2195
2196     /* vs_2_0 and newer write the register containing the index explicitly in the
2197      * binary code
2198      */
2199     if(token & D3DVS_ADDRMODE_RELATIVE) {
2200         sm_3_srcreg(This, reg->rel_reg, buffer);
2201     }
2202 }
2203
2204 static const struct instr_handler_table vs_3_handlers[] = {
2205     {BWRITERSIO_ADD,            instr_handler},
2206     {BWRITERSIO_NOP,            instr_handler},
2207     {BWRITERSIO_MOV,            instr_handler},
2208     {BWRITERSIO_SUB,            instr_handler},
2209     {BWRITERSIO_MAD,            instr_handler},
2210     {BWRITERSIO_MUL,            instr_handler},
2211     {BWRITERSIO_RCP,            instr_handler},
2212     {BWRITERSIO_RSQ,            instr_handler},
2213     {BWRITERSIO_DP3,            instr_handler},
2214     {BWRITERSIO_DP4,            instr_handler},
2215     {BWRITERSIO_MIN,            instr_handler},
2216     {BWRITERSIO_MAX,            instr_handler},
2217     {BWRITERSIO_SLT,            instr_handler},
2218     {BWRITERSIO_SGE,            instr_handler},
2219     {BWRITERSIO_ABS,            instr_handler},
2220     {BWRITERSIO_EXP,            instr_handler},
2221     {BWRITERSIO_LOG,            instr_handler},
2222     {BWRITERSIO_EXPP,           instr_handler},
2223     {BWRITERSIO_LOGP,           instr_handler},
2224     {BWRITERSIO_DST,            instr_handler},
2225     {BWRITERSIO_LRP,            instr_handler},
2226     {BWRITERSIO_FRC,            instr_handler},
2227     {BWRITERSIO_CRS,            instr_handler},
2228     {BWRITERSIO_SGN,            instr_handler},
2229     {BWRITERSIO_NRM,            instr_handler},
2230     {BWRITERSIO_SINCOS,         instr_handler},
2231     {BWRITERSIO_M4x4,           instr_handler},
2232     {BWRITERSIO_M4x3,           instr_handler},
2233     {BWRITERSIO_M3x4,           instr_handler},
2234     {BWRITERSIO_M3x3,           instr_handler},
2235     {BWRITERSIO_M3x2,           instr_handler},
2236     {BWRITERSIO_LIT,            instr_handler},
2237     {BWRITERSIO_POW,            instr_handler},
2238     {BWRITERSIO_MOVA,           instr_handler},
2239
2240     {BWRITERSIO_CALL,           instr_handler},
2241     {BWRITERSIO_CALLNZ,         instr_handler},
2242     {BWRITERSIO_REP,            instr_handler},
2243     {BWRITERSIO_ENDREP,         instr_handler},
2244     {BWRITERSIO_IF,             instr_handler},
2245     {BWRITERSIO_LABEL,          instr_handler},
2246     {BWRITERSIO_IFC,            instr_handler},
2247     {BWRITERSIO_ELSE,           instr_handler},
2248     {BWRITERSIO_ENDIF,          instr_handler},
2249     {BWRITERSIO_BREAK,          instr_handler},
2250     {BWRITERSIO_BREAKC,         instr_handler},
2251     {BWRITERSIO_LOOP,           instr_handler},
2252     {BWRITERSIO_RET,            instr_handler},
2253     {BWRITERSIO_ENDLOOP,        instr_handler},
2254
2255     {BWRITERSIO_SETP,           instr_handler},
2256     {BWRITERSIO_BREAKP,         instr_handler},
2257     {BWRITERSIO_TEXLDL,         instr_handler},
2258
2259     {BWRITERSIO_END,            NULL},
2260 };
2261
2262 static const struct bytecode_backend vs_3_backend = {
2263     sm_3_header,
2264     end,
2265     sm_3_srcreg,
2266     sm_3_dstreg,
2267     sm_2_opcode,
2268     vs_3_handlers
2269 };
2270
2271 static const struct instr_handler_table ps_3_handlers[] = {
2272     {BWRITERSIO_ADD,            instr_handler},
2273     {BWRITERSIO_NOP,            instr_handler},
2274     {BWRITERSIO_MOV,            instr_handler},
2275     {BWRITERSIO_SUB,            instr_handler},
2276     {BWRITERSIO_MAD,            instr_handler},
2277     {BWRITERSIO_MUL,            instr_handler},
2278     {BWRITERSIO_RCP,            instr_handler},
2279     {BWRITERSIO_RSQ,            instr_handler},
2280     {BWRITERSIO_DP3,            instr_handler},
2281     {BWRITERSIO_DP4,            instr_handler},
2282     {BWRITERSIO_MIN,            instr_handler},
2283     {BWRITERSIO_MAX,            instr_handler},
2284     {BWRITERSIO_ABS,            instr_handler},
2285     {BWRITERSIO_EXP,            instr_handler},
2286     {BWRITERSIO_LOG,            instr_handler},
2287     {BWRITERSIO_EXPP,           instr_handler},
2288     {BWRITERSIO_LOGP,           instr_handler},
2289     {BWRITERSIO_LRP,            instr_handler},
2290     {BWRITERSIO_FRC,            instr_handler},
2291     {BWRITERSIO_CRS,            instr_handler},
2292     {BWRITERSIO_NRM,            instr_handler},
2293     {BWRITERSIO_SINCOS,         instr_handler},
2294     {BWRITERSIO_M4x4,           instr_handler},
2295     {BWRITERSIO_M4x3,           instr_handler},
2296     {BWRITERSIO_M3x4,           instr_handler},
2297     {BWRITERSIO_M3x3,           instr_handler},
2298     {BWRITERSIO_M3x2,           instr_handler},
2299     {BWRITERSIO_POW,            instr_handler},
2300     {BWRITERSIO_DP2ADD,         instr_handler},
2301     {BWRITERSIO_CMP,            instr_handler},
2302
2303     {BWRITERSIO_CALL,           instr_handler},
2304     {BWRITERSIO_CALLNZ,         instr_handler},
2305     {BWRITERSIO_REP,            instr_handler},
2306     {BWRITERSIO_ENDREP,         instr_handler},
2307     {BWRITERSIO_IF,             instr_handler},
2308     {BWRITERSIO_LABEL,          instr_handler},
2309     {BWRITERSIO_IFC,            instr_handler},
2310     {BWRITERSIO_ELSE,           instr_handler},
2311     {BWRITERSIO_ENDIF,          instr_handler},
2312     {BWRITERSIO_BREAK,          instr_handler},
2313     {BWRITERSIO_BREAKC,         instr_handler},
2314     {BWRITERSIO_LOOP,           instr_handler},
2315     {BWRITERSIO_RET,            instr_handler},
2316     {BWRITERSIO_ENDLOOP,        instr_handler},
2317
2318     {BWRITERSIO_SETP,           instr_handler},
2319     {BWRITERSIO_BREAKP,         instr_handler},
2320     {BWRITERSIO_TEXLDL,         instr_handler},
2321
2322     {BWRITERSIO_TEX,            instr_handler},
2323     {BWRITERSIO_TEXLDP,         instr_handler},
2324     {BWRITERSIO_TEXLDB,         instr_handler},
2325     {BWRITERSIO_TEXKILL,        instr_handler},
2326     {BWRITERSIO_DSX,            instr_handler},
2327     {BWRITERSIO_DSY,            instr_handler},
2328     {BWRITERSIO_TEXLDD,         instr_handler},
2329
2330     {BWRITERSIO_END,            NULL},
2331 };
2332
2333 static const struct bytecode_backend ps_3_backend = {
2334     sm_3_header,
2335     end,
2336     sm_3_srcreg,
2337     sm_3_dstreg,
2338     sm_2_opcode,
2339     ps_3_handlers
2340 };
2341
2342 static void init_vs10_dx9_writer(struct bc_writer *writer) {
2343     TRACE("Creating DirectX9 vertex shader 1.0 writer\n");
2344     writer->funcs = &vs_1_x_backend;
2345 }
2346
2347 static void init_vs11_dx9_writer(struct bc_writer *writer) {
2348     TRACE("Creating DirectX9 vertex shader 1.1 writer\n");
2349     writer->funcs = &vs_1_x_backend;
2350 }
2351
2352 static void init_vs20_dx9_writer(struct bc_writer *writer) {
2353     TRACE("Creating DirectX9 vertex shader 2.0 writer\n");
2354     writer->funcs = &vs_2_0_backend;
2355 }
2356
2357 static void init_vs2x_dx9_writer(struct bc_writer *writer) {
2358     TRACE("Creating DirectX9 vertex shader 2.x writer\n");
2359     writer->funcs = &vs_2_x_backend;
2360 }
2361
2362 static void init_vs30_dx9_writer(struct bc_writer *writer) {
2363     TRACE("Creating DirectX9 vertex shader 3.0 writer\n");
2364     writer->funcs = &vs_3_backend;
2365 }
2366
2367 static void init_ps10_dx9_writer(struct bc_writer *writer) {
2368     TRACE("Creating DirectX9 pixel shader 1.0 writer\n");
2369     writer->funcs = &ps_1_0123_backend;
2370 }
2371
2372 static void init_ps11_dx9_writer(struct bc_writer *writer) {
2373     TRACE("Creating DirectX9 pixel shader 1.1 writer\n");
2374     writer->funcs = &ps_1_0123_backend;
2375 }
2376
2377 static void init_ps12_dx9_writer(struct bc_writer *writer) {
2378     TRACE("Creating DirectX9 pixel shader 1.2 writer\n");
2379     writer->funcs = &ps_1_0123_backend;
2380 }
2381
2382 static void init_ps13_dx9_writer(struct bc_writer *writer) {
2383     TRACE("Creating DirectX9 pixel shader 1.3 writer\n");
2384     writer->funcs = &ps_1_0123_backend;
2385 }
2386
2387 static void init_ps14_dx9_writer(struct bc_writer *writer) {
2388     TRACE("Creating DirectX9 pixel shader 1.4 writer\n");
2389     writer->funcs = &ps_1_4_backend;
2390 }
2391
2392 static void init_ps20_dx9_writer(struct bc_writer *writer) {
2393     TRACE("Creating DirectX9 pixel shader 2.0 writer\n");
2394     writer->funcs = &ps_2_0_backend;
2395 }
2396
2397 static void init_ps2x_dx9_writer(struct bc_writer *writer) {
2398     TRACE("Creating DirectX9 pixel shader 2.x writer\n");
2399     writer->funcs = &ps_2_x_backend;
2400 }
2401
2402 static void init_ps30_dx9_writer(struct bc_writer *writer) {
2403     TRACE("Creating DirectX9 pixel shader 3.0 writer\n");
2404     writer->funcs = &ps_3_backend;
2405 }
2406
2407 static struct bc_writer *create_writer(DWORD version, DWORD dxversion) {
2408     struct bc_writer *ret = asm_alloc(sizeof(*ret));
2409
2410     if(!ret) {
2411         WARN("Failed to allocate a bytecode writer instance\n");
2412         return NULL;
2413     }
2414
2415     switch(version) {
2416         case BWRITERVS_VERSION(1, 0):
2417             if(dxversion != 9) {
2418                 WARN("Unsupported dxversion for vertex shader 1.0 requested: %u\n", dxversion);
2419                 goto fail;
2420             }
2421             init_vs10_dx9_writer(ret);
2422             break;
2423         case BWRITERVS_VERSION(1, 1):
2424             if(dxversion != 9) {
2425                 WARN("Unsupported dxversion for vertex shader 1.1 requested: %u\n", dxversion);
2426                 goto fail;
2427             }
2428             init_vs11_dx9_writer(ret);
2429             break;
2430         case BWRITERVS_VERSION(2, 0):
2431             if(dxversion != 9) {
2432                 WARN("Unsupported dxversion for vertex shader 2.0 requested: %u\n", dxversion);
2433                 goto fail;
2434             }
2435             init_vs20_dx9_writer(ret);
2436             break;
2437         case BWRITERVS_VERSION(2, 1):
2438             if(dxversion != 9) {
2439                 WARN("Unsupported dxversion for vertex shader 2.x requested: %u\n", dxversion);
2440                 goto fail;
2441             }
2442             init_vs2x_dx9_writer(ret);
2443             break;
2444         case BWRITERVS_VERSION(3, 0):
2445             if(dxversion != 9) {
2446                 WARN("Unsupported dxversion for vertex shader 3.0 requested: %u\n", dxversion);
2447                 goto fail;
2448             }
2449             init_vs30_dx9_writer(ret);
2450             break;
2451
2452         case BWRITERPS_VERSION(1, 0):
2453             if(dxversion != 9) {
2454                 WARN("Unsupported dxversion for pixel shader 1.0 requested: %u\n", dxversion);
2455                 goto fail;
2456             }
2457             init_ps10_dx9_writer(ret);
2458             break;
2459         case BWRITERPS_VERSION(1, 1):
2460             if(dxversion != 9) {
2461                 WARN("Unsupported dxversion for pixel shader 1.1 requested: %u\n", dxversion);
2462                 goto fail;
2463             }
2464             init_ps11_dx9_writer(ret);
2465             break;
2466         case BWRITERPS_VERSION(1, 2):
2467             if(dxversion != 9) {
2468                 WARN("Unsupported dxversion for pixel shader 1.2 requested: %u\n", dxversion);
2469                 goto fail;
2470             }
2471             init_ps12_dx9_writer(ret);
2472             break;
2473         case BWRITERPS_VERSION(1, 3):
2474             if(dxversion != 9) {
2475                 WARN("Unsupported dxversion for pixel shader 1.3 requested: %u\n", dxversion);
2476                 goto fail;
2477             }
2478             init_ps13_dx9_writer(ret);
2479             break;
2480         case BWRITERPS_VERSION(1, 4):
2481             if(dxversion != 9) {
2482                 WARN("Unsupported dxversion for pixel shader 1.4 requested: %u\n", dxversion);
2483                 goto fail;
2484             }
2485             init_ps14_dx9_writer(ret);
2486             break;
2487
2488         case BWRITERPS_VERSION(2, 0):
2489             if(dxversion != 9) {
2490                 WARN("Unsupported dxversion for pixel shader 2.0 requested: %u\n", dxversion);
2491                 goto fail;
2492             }
2493             init_ps20_dx9_writer(ret);
2494             break;
2495
2496         case BWRITERPS_VERSION(2, 1):
2497             if(dxversion != 9) {
2498                 WARN("Unsupported dxversion for pixel shader 2.x requested: %u\n", dxversion);
2499                 goto fail;
2500             }
2501             init_ps2x_dx9_writer(ret);
2502             break;
2503
2504         case BWRITERPS_VERSION(3, 0):
2505             if(dxversion != 9) {
2506                 WARN("Unsupported dxversion for pixel shader 3.0 requested: %u\n", dxversion);
2507                 goto fail;
2508             }
2509             init_ps30_dx9_writer(ret);
2510             break;
2511
2512         default:
2513             WARN("Unexpected shader version requested: %08x\n", version);
2514             goto fail;
2515     }
2516     ret->version = version;
2517     return ret;
2518
2519 fail:
2520     asm_free(ret);
2521     return NULL;
2522 }
2523
2524 static HRESULT call_instr_handler(struct bc_writer *writer,
2525                                   const struct instruction *instr,
2526                                   struct bytecode_buffer *buffer) {
2527     DWORD i=0;
2528
2529     while(writer->funcs->instructions[i].opcode != BWRITERSIO_END) {
2530         if(instr->opcode == writer->funcs->instructions[i].opcode) {
2531             if(!writer->funcs->instructions[i].func) {
2532                 WARN("Opcode %u not supported by this profile\n", instr->opcode);
2533                 return E_INVALIDARG;
2534             }
2535             writer->funcs->instructions[i].func(writer, instr, buffer);
2536             return S_OK;
2537         }
2538         i++;
2539     }
2540
2541     FIXME("Unhandled instruction %u - %s\n", instr->opcode,
2542           debug_print_opcode(instr->opcode));
2543     return E_INVALIDARG;
2544 }
2545
2546 /* SlWriteBytecode (wineshader.@)
2547  *
2548  * Writes shader version specific bytecode from the shader passed in.
2549  * The returned bytecode can be passed to the Direct3D runtime like
2550  * IDirect3DDevice9::Create*Shader.
2551  *
2552  * Parameters:
2553  *  shader: Shader to translate into bytecode
2554  *  version: Shader version to generate(d3d version token)
2555  *  dxversion: DirectX version the code targets
2556  *  result: the resulting shader bytecode
2557  *
2558  * Return values:
2559  *  S_OK on success
2560  */
2561 DWORD SlWriteBytecode(const struct bwriter_shader *shader, int dxversion, DWORD **result) {
2562     struct bc_writer *writer;
2563     struct bytecode_buffer *buffer = NULL;
2564     HRESULT hr;
2565     unsigned int i;
2566
2567     if(!shader){
2568         ERR("NULL shader structure, aborting\n");
2569         return E_FAIL;
2570     }
2571     writer = create_writer(shader->version, dxversion);
2572     *result = NULL;
2573
2574     if(!writer) {
2575         WARN("Could not create a bytecode writer instance. Either unsupported version\n");
2576         WARN("or out of memory\n");
2577         hr = E_FAIL;
2578         goto error;
2579     }
2580
2581     buffer = allocate_buffer();
2582     if(!buffer) {
2583         WARN("Failed to allocate a buffer for the shader bytecode\n");
2584         hr = E_FAIL;
2585         goto error;
2586     }
2587
2588     writer->funcs->header(writer, shader, buffer);
2589     if(FAILED(writer->state)) {
2590         hr = writer->state;
2591         goto error;
2592     }
2593
2594     for(i = 0; i < shader->num_instrs; i++) {
2595         hr = call_instr_handler(writer, shader->instr[i], buffer);
2596         if(FAILED(hr)) {
2597             goto error;
2598         }
2599     }
2600
2601     if(FAILED(writer->state)) {
2602         hr = writer->state;
2603         goto error;
2604     }
2605
2606     writer->funcs->end(writer, shader, buffer);
2607
2608     if(FAILED(buffer->state)) {
2609         hr = buffer->state;
2610         goto error;
2611     }
2612
2613     /* Cut off unneeded memory from the result buffer */
2614     *result = asm_realloc(buffer->data,
2615                          sizeof(DWORD) * buffer->size);
2616     if(!*result) {
2617         *result = buffer->data;
2618     }
2619     buffer->data = NULL;
2620     hr = S_OK;
2621
2622 error:
2623     if(buffer) {
2624         asm_free(buffer->data);
2625         asm_free(buffer);
2626     }
2627     asm_free(writer);
2628     return hr;
2629 }
2630
2631 void SlDeleteShader(struct bwriter_shader *shader) {
2632     unsigned int i, j;
2633
2634     TRACE("Deleting shader %p\n", shader);
2635
2636     for(i = 0; i < shader->num_cf; i++) {
2637         asm_free(shader->constF[i]);
2638     }
2639     asm_free(shader->constF);
2640     for(i = 0; i < shader->num_ci; i++) {
2641         asm_free(shader->constI[i]);
2642     }
2643     asm_free(shader->constI);
2644     for(i = 0; i < shader->num_cb; i++) {
2645         asm_free(shader->constB[i]);
2646     }
2647     asm_free(shader->constB);
2648
2649     asm_free(shader->inputs);
2650     asm_free(shader->outputs);
2651     asm_free(shader->samplers);
2652
2653     for(i = 0; i < shader->num_instrs; i++) {
2654         for(j = 0; j < shader->instr[i]->num_srcs; j++) {
2655             asm_free(shader->instr[i]->src[j].rel_reg);
2656         }
2657         asm_free(shader->instr[i]->src);
2658         asm_free(shader->instr[i]);
2659     }
2660     asm_free(shader->instr);
2661
2662     asm_free(shader);
2663 }