windowscodecs: Handle TIFF's with RowsPerStrip greater than Height.
[wine] / dlls / d3dx9_36 / bytecodewriter.c
1 /*
2  * Direct3D bytecode output functions
3  *
4  * Copyright 2008 Stefan Dösinger
5  * Copyright 2009 Matteo Bruni
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20  *
21  */
22
23 #include "config.h"
24 #include "wine/port.h"
25 #include "wine/debug.h"
26
27 #include "d3dx9_36_private.h"
28
29 WINE_DEFAULT_DEBUG_CHANNEL(asmshader);
30
31 /****************************************************************
32  * General assembler shader construction helper routines follow *
33  ****************************************************************/
34 /* struct instruction *alloc_instr
35  *
36  * Allocates a new instruction structure with srcs registers
37  *
38  * Parameters:
39  *  srcs: Number of source registers to allocate
40  *
41  * Returns:
42  *  A pointer to the allocated instruction structure
43  *  NULL in case of an allocation failure
44  */
45 struct instruction *alloc_instr(unsigned int srcs) {
46     struct instruction *ret = asm_alloc(sizeof(*ret));
47     if(!ret) {
48         ERR("Failed to allocate memory for an instruction structure\n");
49         return NULL;
50     }
51
52     if(srcs) {
53         ret->src = asm_alloc(srcs * sizeof(*ret->src));
54         if(!ret->src) {
55             ERR("Failed to allocate memory for instruction registers\n");
56             asm_free(ret);
57             return NULL;
58         }
59         ret->num_srcs = srcs;
60     }
61     return ret;
62 }
63
64 /* void add_instruction
65  *
66  * Adds a new instruction to the shader's instructions array and grows the instruction array
67  * if needed.
68  *
69  * The function does NOT copy the instruction structure. Make sure not to release the
70  * instruction or any of its substructures like registers.
71  *
72  * Parameters:
73  *  shader: Shader to add the instruction to
74  *  instr: Instruction to add to the shader
75  */
76 BOOL add_instruction(struct bwriter_shader *shader, struct instruction *instr) {
77     struct instruction      **new_instructions;
78
79     if(!shader) return FALSE;
80
81     if(shader->instr_alloc_size == 0) {
82         shader->instr = asm_alloc(sizeof(*shader->instr) * INSTRARRAY_INITIAL_SIZE);
83         if(!shader->instr) {
84             ERR("Failed to allocate the shader instruction array\n");
85             return FALSE;
86         }
87         shader->instr_alloc_size = INSTRARRAY_INITIAL_SIZE;
88     } else if(shader->instr_alloc_size == shader->num_instrs) {
89         new_instructions = asm_realloc(shader->instr,
90                                        sizeof(*shader->instr) * (shader->instr_alloc_size) * 2);
91         if(!new_instructions) {
92             ERR("Failed to grow the shader instruction array\n");
93             return FALSE;
94         }
95         shader->instr = new_instructions;
96         shader->instr_alloc_size = shader->instr_alloc_size * 2;
97     } else if(shader->num_instrs > shader->instr_alloc_size) {
98         ERR("More instructions than allocated. This should not happen\n");
99         return FALSE;
100     }
101
102     shader->instr[shader->num_instrs] = instr;
103     shader->num_instrs++;
104     return TRUE;
105 }
106
107 BOOL add_constF(struct bwriter_shader *shader, DWORD reg, float x, float y, float z, float w) {
108     struct constant *newconst;
109
110     if(shader->num_cf) {
111         struct constant **newarray;
112         newarray = asm_realloc(shader->constF,
113                                sizeof(*shader->constF) * (shader->num_cf + 1));
114         if(!newarray) {
115             ERR("Failed to grow the constants array\n");
116             return FALSE;
117         }
118         shader->constF = newarray;
119     } else {
120         shader->constF = asm_alloc(sizeof(*shader->constF));
121         if(!shader->constF) {
122             ERR("Failed to allocate the constants array\n");
123             return FALSE;
124         }
125     }
126
127     newconst = asm_alloc(sizeof(*newconst));
128     if(!newconst) {
129         ERR("Failed to allocate a new constant\n");
130         return FALSE;
131     }
132     newconst->regnum = reg;
133     newconst->value[0].f = x;
134     newconst->value[1].f = y;
135     newconst->value[2].f = z;
136     newconst->value[3].f = w;
137     shader->constF[shader->num_cf] = newconst;
138
139     shader->num_cf++;
140     return TRUE;
141 }
142
143 BOOL add_constI(struct bwriter_shader *shader, DWORD reg, INT x, INT y, INT z, INT w) {
144     struct constant *newconst;
145
146     if(shader->num_ci) {
147         struct constant **newarray;
148         newarray = asm_realloc(shader->constI,
149                                sizeof(*shader->constI) * (shader->num_ci + 1));
150         if(!newarray) {
151             ERR("Failed to grow the constants array\n");
152             return FALSE;
153         }
154         shader->constI = newarray;
155     } else {
156         shader->constI = asm_alloc(sizeof(*shader->constI));
157         if(!shader->constI) {
158             ERR("Failed to allocate the constants array\n");
159             return FALSE;
160         }
161     }
162
163     newconst = asm_alloc(sizeof(*newconst));
164     if(!newconst) {
165         ERR("Failed to allocate a new constant\n");
166         return FALSE;
167     }
168     newconst->regnum = reg;
169     newconst->value[0].i = x;
170     newconst->value[1].i = y;
171     newconst->value[2].i = z;
172     newconst->value[3].i = w;
173     shader->constI[shader->num_ci] = newconst;
174
175     shader->num_ci++;
176     return TRUE;
177 }
178
179 BOOL add_constB(struct bwriter_shader *shader, DWORD reg, BOOL x) {
180     struct constant *newconst;
181
182     if(shader->num_cb) {
183         struct constant **newarray;
184         newarray = asm_realloc(shader->constB,
185                                sizeof(*shader->constB) * (shader->num_cb + 1));
186         if(!newarray) {
187             ERR("Failed to grow the constants array\n");
188             return FALSE;
189         }
190         shader->constB = newarray;
191     } else {
192         shader->constB = asm_alloc(sizeof(*shader->constB));
193         if(!shader->constB) {
194             ERR("Failed to allocate the constants array\n");
195             return FALSE;
196         }
197     }
198
199     newconst = asm_alloc(sizeof(*newconst));
200     if(!newconst) {
201         ERR("Failed to allocate a new constant\n");
202         return FALSE;
203     }
204     newconst->regnum = reg;
205     newconst->value[0].b = x;
206     shader->constB[shader->num_cb] = newconst;
207
208     shader->num_cb++;
209     return TRUE;
210 }
211
212 BOOL record_declaration(struct bwriter_shader *shader, DWORD usage,
213                         DWORD usage_idx, DWORD mod, BOOL output,
214                         DWORD regnum, DWORD writemask, BOOL builtin) {
215     unsigned int *num;
216     struct declaration **decl;
217     unsigned int i;
218
219     if(!shader) return FALSE;
220
221     if(output) {
222         num = &shader->num_outputs;
223         decl = &shader->outputs;
224     } else {
225         num = &shader->num_inputs;
226         decl = &shader->inputs;
227     }
228
229     if(*num == 0) {
230         *decl = asm_alloc(sizeof(**decl));
231         if(!*decl) {
232             ERR("Error allocating declarations array\n");
233             return FALSE;
234         }
235     } else {
236         struct declaration *newdecl;
237         for(i = 0; i < *num; i++) {
238             if((*decl)[i].regnum == regnum && ((*decl)[i].writemask & writemask)) {
239                 WARN("Declaration of register %u already exists, writemask match 0x%x\n",
240                       regnum, (*decl)[i].writemask & writemask);
241             }
242         }
243
244         newdecl = asm_realloc(*decl,
245                               sizeof(**decl) * ((*num) + 1));
246         if(!newdecl) {
247             ERR("Error reallocating declarations array\n");
248             return FALSE;
249         }
250         *decl = newdecl;
251     }
252     (*decl)[*num].usage = usage;
253     (*decl)[*num].usage_idx = usage_idx;
254     (*decl)[*num].regnum = regnum;
255     (*decl)[*num].mod = mod;
256     (*decl)[*num].writemask = writemask;
257     (*decl)[*num].builtin = builtin;
258     (*num)++;
259
260     return TRUE;
261 }
262
263 BOOL record_sampler(struct bwriter_shader *shader, DWORD samptype, DWORD mod, DWORD regnum) {
264     unsigned int i;
265
266     if(!shader) return FALSE;
267
268     if(shader->num_samplers == 0) {
269         shader->samplers = asm_alloc(sizeof(*shader->samplers));
270         if(!shader->samplers) {
271             ERR("Error allocating samplers array\n");
272             return FALSE;
273         }
274     } else {
275         struct samplerdecl *newarray;
276
277         for(i = 0; i < shader->num_samplers; i++) {
278             if(shader->samplers[i].regnum == regnum) {
279                 WARN("Sampler %u already declared\n", regnum);
280                 /* This is not an error as far as the assembler is concerned.
281                  * Direct3D might refuse to load the compiled shader though
282                  */
283             }
284         }
285
286         newarray = asm_realloc(shader->samplers,
287                                sizeof(*shader->samplers) * (shader->num_samplers + 1));
288         if(!newarray) {
289             ERR("Error reallocating samplers array\n");
290             return FALSE;
291         }
292         shader->samplers = newarray;
293     }
294
295     shader->samplers[shader->num_samplers].type = samptype;
296     shader->samplers[shader->num_samplers].mod = mod;
297     shader->samplers[shader->num_samplers].regnum = regnum;
298     shader->num_samplers++;
299     return TRUE;
300 }
301
302
303 /* shader bytecode buffer manipulation functions.
304  * allocate_buffer creates a new buffer structure, put_dword adds a new
305  * DWORD to the buffer. In the rare case of a memory allocation failure
306  * when trying to grow the buffer a flag is set in the buffer to mark it
307  * invalid. This avoids return value checking and passing in many places
308  */
309 static struct bytecode_buffer *allocate_buffer(void) {
310     struct bytecode_buffer *ret;
311
312     ret = asm_alloc(sizeof(*ret));
313     if(!ret) return NULL;
314
315     ret->alloc_size = BYTECODEBUFFER_INITIAL_SIZE;
316     ret->data = asm_alloc(sizeof(DWORD) * ret->alloc_size);
317     if(!ret->data) {
318         asm_free(ret);
319         return NULL;
320     }
321     ret->state = S_OK;
322     return ret;
323 }
324
325 static void put_dword(struct bytecode_buffer *buffer, DWORD value) {
326     if(FAILED(buffer->state)) return;
327
328     if(buffer->alloc_size == buffer->size) {
329         DWORD *newarray;
330         buffer->alloc_size *= 2;
331         newarray = asm_realloc(buffer->data,
332                                sizeof(DWORD) * buffer->alloc_size);
333         if(!newarray) {
334             ERR("Failed to grow the buffer data memory\n");
335             buffer->state = E_OUTOFMEMORY;
336             return;
337         }
338         buffer->data = newarray;
339     }
340     buffer->data[buffer->size++] = value;
341 }
342
343 /******************************************************
344  * Implementation of the writer functions starts here *
345  ******************************************************/
346 static void write_declarations(struct bc_writer *This,
347                                struct bytecode_buffer *buffer, BOOL len,
348                                const struct declaration *decls, unsigned int num, DWORD type) {
349     DWORD i;
350     DWORD instr_dcl = D3DSIO_DCL;
351     DWORD token;
352     struct shader_reg reg;
353
354     ZeroMemory(&reg, sizeof(reg));
355
356     if(len) {
357         instr_dcl |= 2 << D3DSI_INSTLENGTH_SHIFT;
358     }
359
360     for(i = 0; i < num; i++) {
361         if(decls[i].builtin) continue;
362
363         /* Write the DCL instruction */
364         put_dword(buffer, instr_dcl);
365
366         /* Write the usage and index */
367         token = (1 << 31); /* Bit 31 of non-instruction opcodes is 1 */
368         token |= (decls[i].usage << D3DSP_DCL_USAGE_SHIFT) & D3DSP_DCL_USAGE_MASK;
369         token |= (decls[i].usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT) & D3DSP_DCL_USAGEINDEX_MASK;
370         put_dword(buffer, token);
371
372         /* Write the dest register */
373         reg.type = type;
374         reg.regnum = decls[i].regnum;
375         reg.writemask = decls[i].writemask;
376         This->funcs->dstreg(This, &reg, buffer, 0, decls[i].mod);
377     }
378 }
379
380 static void write_const(struct constant **consts, int num, DWORD opcode, DWORD reg_type, struct bytecode_buffer *buffer, BOOL len) {
381     DWORD i;
382     DWORD instr_def = opcode;
383     const DWORD reg = (1<<31) |
384                       ((reg_type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
385                       ((reg_type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
386                       D3DSP_WRITEMASK_ALL;
387
388     if(len) {
389         if(opcode == D3DSIO_DEFB)
390             instr_def |= 2 << D3DSI_INSTLENGTH_SHIFT;
391         else
392             instr_def |= 5 << D3DSI_INSTLENGTH_SHIFT;
393     }
394
395     for(i = 0; i < num; i++) {
396         /* Write the DEF instruction */
397         put_dword(buffer, instr_def);
398
399         put_dword(buffer, reg | (consts[i]->regnum & D3DSP_REGNUM_MASK));
400         put_dword(buffer, consts[i]->value[0].d);
401         if(opcode != D3DSIO_DEFB) {
402             put_dword(buffer, consts[i]->value[1].d);
403             put_dword(buffer, consts[i]->value[2].d);
404             put_dword(buffer, consts[i]->value[3].d);
405         }
406     }
407 }
408
409 static void write_constF(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
410     write_const(shader->constF, shader->num_cf, D3DSIO_DEF, D3DSPR_CONST, buffer, len);
411 }
412
413 /* This function looks for VS 1/2 registers mapping to VS 3 output registers */
414 static HRESULT vs_find_builtin_varyings(struct bc_writer *This, const struct bwriter_shader *shader) {
415     DWORD i;
416     DWORD usage, usage_idx, writemask, regnum;
417
418     for(i = 0; i < shader->num_outputs; i++) {
419         if(!shader->outputs[i].builtin) continue;
420
421         usage = shader->outputs[i].usage;
422         usage_idx = shader->outputs[i].usage_idx;
423         writemask = shader->outputs[i].writemask;
424         regnum = shader->outputs[i].regnum;
425
426         switch(usage) {
427             case BWRITERDECLUSAGE_POSITION:
428             case BWRITERDECLUSAGE_POSITIONT:
429                 if(usage_idx > 0) {
430                     WARN("dcl_position%u not supported in sm 1/2 shaders\n", usage_idx);
431                     return E_INVALIDARG;
432                 }
433                 TRACE("o%u is oPos\n", regnum);
434                 This->oPos_regnum = regnum;
435                 break;
436
437             case BWRITERDECLUSAGE_COLOR:
438                 if(usage_idx > 1) {
439                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
440                     return E_INVALIDARG;
441                 }
442                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
443                     WARN("Only WRITEMASK_ALL is supported on color in sm 1/2\n");
444                     return E_INVALIDARG;
445                 }
446                 TRACE("o%u is oD%u\n", regnum, usage_idx);
447                 This->oD_regnum[usage_idx] = regnum;
448                 break;
449
450             case BWRITERDECLUSAGE_TEXCOORD:
451                 if(usage_idx >= 8) {
452                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
453                     return E_INVALIDARG;
454                 }
455                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
456                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
457                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
458                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
459                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
460                     return E_INVALIDARG;
461                 }
462                 TRACE("o%u is oT%u\n", regnum, usage_idx);
463                 This->oT_regnum[usage_idx] = regnum;
464                 break;
465
466             case BWRITERDECLUSAGE_PSIZE:
467                 if(usage_idx > 0) {
468                     WARN("dcl_psize%u not supported in sm 1/2 shaders\n", usage_idx);
469                     return E_INVALIDARG;
470                 }
471                 TRACE("o%u writemask 0x%08x is oPts\n", regnum, writemask);
472                 This->oPts_regnum = regnum;
473                 This->oPts_mask = writemask;
474                 break;
475
476             case BWRITERDECLUSAGE_FOG:
477                 if(usage_idx > 0) {
478                     WARN("dcl_fog%u not supported in sm 1 shaders\n", usage_idx);
479                     return E_INVALIDARG;
480                 }
481                 if(writemask != BWRITERSP_WRITEMASK_0 && writemask != BWRITERSP_WRITEMASK_1 &&
482                    writemask != BWRITERSP_WRITEMASK_2 && writemask != BWRITERSP_WRITEMASK_3) {
483                     WARN("Unsupported fog writemask\n");
484                     return E_INVALIDARG;
485                 }
486                 TRACE("o%u writemask 0x%08x is oFog\n", regnum, writemask);
487                 This->oFog_regnum = regnum;
488                 This->oFog_mask = writemask;
489                 break;
490
491             default:
492                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
493                 return E_INVALIDARG;
494         }
495     }
496
497     return S_OK;
498 }
499
500 static void vs_1_x_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
501     HRESULT hr;
502
503     if(shader->num_ci || shader->num_cb) {
504         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
505         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
506         This->state = E_INVALIDARG;
507         return;
508     }
509
510     hr = vs_find_builtin_varyings(This, shader);
511     if(FAILED(hr)) {
512         This->state = hr;
513         return;
514     }
515
516     /* Declare the shader type and version */
517     put_dword(buffer, This->version);
518
519     write_declarations(This, buffer, FALSE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
520     write_constF(shader, buffer, FALSE);
521     return;
522 }
523
524 static HRESULT find_ps_builtin_semantics(struct bc_writer *This,
525                                          const struct bwriter_shader *shader,
526                                          DWORD texcoords) {
527     DWORD i;
528     DWORD usage, usage_idx, writemask, regnum;
529
530     This->v_regnum[0] = -1; This->v_regnum[1] = -1;
531     for(i = 0; i < 8; i++) This->t_regnum[i] = -1;
532
533     for(i = 0; i < shader->num_inputs; i++) {
534         if(!shader->inputs[i].builtin) continue;
535
536         usage = shader->inputs[i].usage;
537         usage_idx = shader->inputs[i].usage_idx;
538         writemask = shader->inputs[i].writemask;
539         regnum = shader->inputs[i].regnum;
540
541         switch(usage) {
542             case BWRITERDECLUSAGE_COLOR:
543                 if(usage_idx > 1) {
544                     WARN("dcl_color%u not supported in sm 1 shaders\n", usage_idx);
545                     return E_INVALIDARG;
546                 }
547                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
548                     WARN("Only WRITEMASK_ALL is supported on color in sm 1\n");
549                     return E_INVALIDARG;
550                 }
551                 TRACE("v%u is v%u\n", regnum, usage_idx);
552                 This->v_regnum[usage_idx] = regnum;
553                 break;
554
555             case BWRITERDECLUSAGE_TEXCOORD:
556                 if(usage_idx > texcoords) {
557                     WARN("dcl_texcoord%u not supported in this shader version\n", usage_idx);
558                     return E_INVALIDARG;
559                 }
560                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
561                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
562                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
563                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
564                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
565                 } else {
566                     writemask = BWRITERSP_WRITEMASK_ALL;
567                 }
568                 TRACE("v%u is t%u\n", regnum, usage_idx);
569                 This->t_regnum[usage_idx] = regnum;
570                 break;
571
572             default:
573                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
574                 return E_INVALIDARG;
575         }
576     }
577
578     return S_OK;
579 }
580
581 static void ps_1_x_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
582     HRESULT hr;
583
584     /* First check the constants and varyings, and complain if unsupported things are used */
585     if(shader->num_ci || shader->num_cb) {
586         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
587         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
588         This->state = E_INVALIDARG;
589         return;
590     }
591
592     hr = find_ps_builtin_semantics(This, shader, 4);
593     if(FAILED(hr)) {
594         This->state = hr;
595         return;
596     }
597
598     /* Declare the shader type and version */
599     put_dword(buffer, This->version);
600     write_constF(shader, buffer, TRUE);
601 }
602
603 static void ps_1_4_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
604     HRESULT hr;
605
606     /* First check the constants and varyings, and complain if unsupported things are used */
607     if(shader->num_ci || shader->num_cb) {
608         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
609         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
610         This->state = E_INVALIDARG;
611         return;
612     }
613     hr = find_ps_builtin_semantics(This, shader, 6);
614     if(FAILED(hr)) {
615         This->state = hr;
616         return;
617     }
618
619     /* Declare the shader type and version */
620     put_dword(buffer, This->version);
621     write_constF(shader, buffer, TRUE);
622 }
623
624 static void end(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
625     put_dword(buffer, D3DSIO_END);
626 }
627
628 static DWORD map_vs_output(struct bc_writer *This, DWORD regnum, DWORD mask, DWORD *has_components) {
629     DWORD token = 0;
630     DWORD i;
631
632     *has_components = TRUE;
633     if(regnum == This->oPos_regnum) {
634         token |= (D3DSPR_RASTOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
635         token |= D3DSRO_POSITION & D3DSP_REGNUM_MASK; /* No shift */
636         return token;
637     }
638     if(regnum == This->oFog_regnum && mask == This->oFog_mask) {
639         token |= (D3DSPR_RASTOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
640         token |= D3DSRO_FOG & D3DSP_REGNUM_MASK; /* No shift */
641         token |= D3DSP_WRITEMASK_ALL;
642         *has_components = FALSE;
643         return token;
644     }
645     if(regnum == This->oPts_regnum && mask == This->oPts_mask) {
646         token |= (D3DSPR_RASTOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
647         token |= D3DSRO_POINT_SIZE & D3DSP_REGNUM_MASK; /* No shift */
648         token |= D3DSP_WRITEMASK_ALL;
649         *has_components = FALSE;
650         return token;
651     }
652     for(i = 0; i < 2; i++) {
653         if(regnum == This->oD_regnum[i]) {
654             token |= (D3DSPR_ATTROUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
655             token |= i & D3DSP_REGNUM_MASK; /* No shift */
656             return token;
657         }
658     }
659     for(i = 0; i < 8; i++) {
660         if(regnum == This->oT_regnum[i]) {
661             token |= (D3DSPR_TEXCRDOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
662             token |= i & D3DSP_REGNUM_MASK; /* No shift */
663             return token;
664         }
665     }
666
667     /* The varying must be undeclared - if an unsupported varying was declared,
668      * the vs_find_builtin_varyings function would have caught it and this code
669      * would not run */
670     WARN("Undeclared varying %u\n", regnum);
671     This->state = E_INVALIDARG;
672     return -1;
673 }
674
675 static void vs_12_dstreg(struct bc_writer *This, const struct shader_reg *reg,
676                          struct bytecode_buffer *buffer,
677                          DWORD shift, DWORD mod) {
678     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
679     DWORD has_wmask;
680
681     if(reg->rel_reg) {
682         WARN("Relative addressing not supported for destination registers\n");
683         This->state = E_INVALIDARG;
684         return;
685     }
686
687     switch(reg->type) {
688         case BWRITERSPR_OUTPUT:
689             token |= map_vs_output(This, reg->regnum, reg->writemask, &has_wmask);
690             break;
691
692         case BWRITERSPR_RASTOUT:
693         case BWRITERSPR_ATTROUT:
694             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
695             * but are unexpected. If we hit this path it might be due to an error.
696             */
697             FIXME("Unexpected register type %u\n", reg->type);
698             /* drop through */
699         case BWRITERSPR_INPUT:
700         case BWRITERSPR_TEMP:
701         case BWRITERSPR_CONST:
702             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
703             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
704             has_wmask = TRUE;
705             break;
706
707         case BWRITERSPR_ADDR:
708             if(reg->regnum != 0) {
709                 WARN("Only a0 exists\n");
710                 This->state = E_INVALIDARG;
711                 return;
712             }
713             token |= (D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
714             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
715             has_wmask = TRUE;
716             break;
717
718         case BWRITERSPR_PREDICATE:
719             if(This->version != BWRITERVS_VERSION(2, 1)){
720                 WARN("Predicate register is allowed only in vs_2_x\n");
721                 This->state = E_INVALIDARG;
722                 return;
723             }
724             if(reg->regnum != 0) {
725                 WARN("Only predicate register p0 exists\n");
726                 This->state = E_INVALIDARG;
727                 return;
728             }
729             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
730             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
731             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
732             has_wmask = TRUE;
733             break;
734
735         default:
736             WARN("Invalid register type for 1.x-2.x vertex shader\n");
737             This->state = E_INVALIDARG;
738             return;
739     }
740
741     /* strictly speaking there are no modifiers in vs_2_0 and vs_1_x, but they can be written
742      * into the bytecode and since the compiler doesn't do such checks write them
743      * (the checks are done by the undocumented shader validator)
744      */
745     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
746     token |= d3d9_dstmod(mod);
747
748     if(has_wmask) {
749         token |= d3d9_writemask(reg->writemask);
750     }
751     put_dword(buffer, token);
752 }
753
754 static void vs_1_x_srcreg(struct bc_writer *This, const struct shader_reg *reg,
755                           struct bytecode_buffer *buffer) {
756     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
757     DWORD has_swizzle;
758     DWORD component;
759
760     switch(reg->type) {
761         case BWRITERSPR_OUTPUT:
762             /* Map the swizzle to a writemask, the format expected
763                by map_vs_output
764              */
765             switch(reg->swizzle) {
766                 case BWRITERVS_SWIZZLE_X:
767                     component = BWRITERSP_WRITEMASK_0;
768                     break;
769                 case BWRITERVS_SWIZZLE_Y:
770                     component = BWRITERSP_WRITEMASK_1;
771                     break;
772                 case BWRITERVS_SWIZZLE_Z:
773                     component = BWRITERSP_WRITEMASK_2;
774                     break;
775                 case BWRITERVS_SWIZZLE_W:
776                     component = BWRITERSP_WRITEMASK_3;
777                     break;
778                 default:
779                     component = 0;
780             }
781             token |= map_vs_output(This, reg->regnum, component, &has_swizzle);
782             break;
783
784         case BWRITERSPR_RASTOUT:
785         case BWRITERSPR_ATTROUT:
786             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
787              * but are unexpected. If we hit this path it might be due to an error.
788              */
789             FIXME("Unexpected register type %u\n", reg->type);
790             /* drop through */
791         case BWRITERSPR_INPUT:
792         case BWRITERSPR_TEMP:
793         case BWRITERSPR_CONST:
794         case BWRITERSPR_ADDR:
795             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
796             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
797             if(reg->rel_reg) {
798                 if(reg->rel_reg->type != BWRITERSPR_ADDR ||
799                    reg->rel_reg->regnum != 0 ||
800                    reg->rel_reg->swizzle != BWRITERVS_SWIZZLE_X) {
801                     WARN("Relative addressing in vs_1_x is only allowed with a0.x\n");
802                     This->state = E_INVALIDARG;
803                     return;
804                 }
805                 token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
806             }
807             break;
808
809         default:
810             WARN("Invalid register type for 1.x vshader\n");
811             This->state = E_INVALIDARG;
812             return;
813     }
814
815     token |= d3d9_swizzle(reg->swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
816
817     token |= d3d9_srcmod(reg->srcmod);
818     put_dword(buffer, token);
819 }
820
821 static void write_srcregs(struct bc_writer *This, const struct instruction *instr,
822                           struct bytecode_buffer *buffer){
823     unsigned int i;
824     if(instr->has_predicate){
825         This->funcs->srcreg(This, &instr->predicate, buffer);
826     }
827     for(i = 0; i < instr->num_srcs; i++){
828         This->funcs->srcreg(This, &instr->src[i], buffer);
829     }
830 }
831
832 static DWORD map_ps13_temp(struct bc_writer *This, const struct shader_reg *reg) {
833     DWORD token = 0;
834     if(reg->regnum == T0_REG) {
835         token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
836         token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
837     } else if(reg->regnum == T1_REG) {
838         token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
839         token |= 1 & D3DSP_REGNUM_MASK; /* No shift */
840     } else if(reg->regnum == T2_REG) {
841         token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
842         token |= 2 & D3DSP_REGNUM_MASK; /* No shift */
843     } else if(reg->regnum == T3_REG) {
844         token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
845         token |= 3 & D3DSP_REGNUM_MASK; /* No shift */
846     } else {
847         token |= (D3DSPR_TEMP << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
848         token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
849     }
850     return token;
851 }
852
853 static DWORD map_ps_input(struct bc_writer *This,
854                           const struct shader_reg *reg) {
855     DWORD i, token = 0;
856     /* Map color interpolators */
857     for(i = 0; i < 2; i++) {
858         if(reg->regnum == This->v_regnum[i]) {
859             token |= (D3DSPR_INPUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
860             token |= i & D3DSP_REGNUM_MASK; /* No shift */
861             return token;
862         }
863     }
864     for(i = 0; i < 8; i++) {
865         if(reg->regnum == This->t_regnum[i]) {
866             token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
867             token |= i & D3DSP_REGNUM_MASK; /* No shift */
868             return token;
869         }
870     }
871
872     WARN("Invalid ps 1/2 varying\n");
873     This->state = E_INVALIDARG;
874     return token;
875 }
876
877 static void ps_1_0123_srcreg(struct bc_writer *This, const struct shader_reg *reg,
878                              struct bytecode_buffer *buffer) {
879     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
880     if(reg->rel_reg) {
881         WARN("Relative addressing not supported in <= ps_3_0\n");
882         This->state = E_INVALIDARG;
883         return;
884     }
885
886     switch(reg->type) {
887         case BWRITERSPR_INPUT:
888             token |= map_ps_input(This, reg);
889             break;
890
891             /* Take care about the texture temporaries. There's a problem: They aren't
892              * declared anywhere, so we can only hardcode the values that are used
893              * to map ps_1_3 shaders to the common shader structure
894              */
895         case BWRITERSPR_TEMP:
896             token |= map_ps13_temp(This, reg);
897             break;
898
899         case BWRITERSPR_CONST: /* Can be mapped 1:1 */
900             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
901             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
902             break;
903
904         default:
905             WARN("Invalid register type for <= ps_1_3 shader\n");
906             This->state = E_INVALIDARG;
907             return;
908     }
909
910     token |= d3d9_swizzle(reg->swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
911
912     if(reg->srcmod == BWRITERSPSM_DZ || reg->srcmod == BWRITERSPSM_DW ||
913        reg->srcmod == BWRITERSPSM_ABS || reg->srcmod == BWRITERSPSM_ABSNEG ||
914        reg->srcmod == BWRITERSPSM_NOT) {
915         WARN("Invalid source modifier %u for <= ps_1_3\n", reg->srcmod);
916         This->state = E_INVALIDARG;
917         return;
918     }
919     token |= d3d9_srcmod(reg->srcmod);
920     put_dword(buffer, token);
921 }
922
923 static void ps_1_0123_dstreg(struct bc_writer *This, const struct shader_reg *reg,
924                              struct bytecode_buffer *buffer,
925                              DWORD shift, DWORD mod) {
926     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
927
928     if(reg->rel_reg) {
929         WARN("Relative addressing not supported for destination registers\n");
930         This->state = E_INVALIDARG;
931         return;
932     }
933
934     switch(reg->type) {
935         case BWRITERSPR_TEMP:
936             token |= map_ps13_temp(This, reg);
937             break;
938
939         /* texkill uses the input register as a destination parameter */
940         case BWRITERSPR_INPUT:
941             token |= map_ps_input(This, reg);
942             break;
943
944         default:
945             WARN("Invalid dest register type for 1.x pshader\n");
946             This->state = E_INVALIDARG;
947             return;
948     }
949
950     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
951     token |= d3d9_dstmod(mod);
952
953     token |= d3d9_writemask(reg->writemask);
954     put_dword(buffer, token);
955 }
956
957 /* The length of an instruction consists of the destination register (if any),
958  * the number of source registers, the number of address registers used for
959  * indirect addressing, and optionally the predicate register
960  */
961 static DWORD instrlen(const struct instruction *instr, unsigned int srcs, unsigned int dsts) {
962     unsigned int i;
963     DWORD ret = srcs + dsts + (instr->has_predicate ? 1 : 0);
964
965     if(dsts){
966         if(instr->dst.rel_reg) ret++;
967     }
968     for(i = 0; i < srcs; i++) {
969         if(instr->src[i].rel_reg) ret++;
970     }
971     return ret;
972 }
973
974 static void sm_1_x_opcode(struct bc_writer *This,
975                           const struct instruction *instr,
976                           DWORD token, struct bytecode_buffer *buffer) {
977     /* In sm_1_x instruction length isn't encoded */
978     if(instr->coissue){
979         token |= D3DSI_COISSUE;
980     }
981     put_dword(buffer, token);
982 }
983
984 static void instr_handler(struct bc_writer *This,
985                           const struct instruction *instr,
986                           struct bytecode_buffer *buffer) {
987     DWORD token = d3d9_opcode(instr->opcode);
988
989     This->funcs->opcode(This, instr, token, buffer);
990     if(instr->has_dst) This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
991     write_srcregs(This, instr, buffer);
992 }
993
994 static const struct instr_handler_table vs_1_x_handlers[] = {
995     {BWRITERSIO_ADD,            instr_handler},
996     {BWRITERSIO_NOP,            instr_handler},
997     {BWRITERSIO_MOV,            instr_handler},
998     {BWRITERSIO_SUB,            instr_handler},
999     {BWRITERSIO_MAD,            instr_handler},
1000     {BWRITERSIO_MUL,            instr_handler},
1001     {BWRITERSIO_RCP,            instr_handler},
1002     {BWRITERSIO_RSQ,            instr_handler},
1003     {BWRITERSIO_DP3,            instr_handler},
1004     {BWRITERSIO_DP4,            instr_handler},
1005     {BWRITERSIO_MIN,            instr_handler},
1006     {BWRITERSIO_MAX,            instr_handler},
1007     {BWRITERSIO_SLT,            instr_handler},
1008     {BWRITERSIO_SGE,            instr_handler},
1009     {BWRITERSIO_EXP,            instr_handler},
1010     {BWRITERSIO_LOG,            instr_handler},
1011     {BWRITERSIO_EXPP,           instr_handler},
1012     {BWRITERSIO_LOGP,           instr_handler},
1013     {BWRITERSIO_DST,            instr_handler},
1014     {BWRITERSIO_FRC,            instr_handler},
1015     {BWRITERSIO_M4x4,           instr_handler},
1016     {BWRITERSIO_M4x3,           instr_handler},
1017     {BWRITERSIO_M3x4,           instr_handler},
1018     {BWRITERSIO_M3x3,           instr_handler},
1019     {BWRITERSIO_M3x2,           instr_handler},
1020     {BWRITERSIO_LIT,            instr_handler},
1021
1022     {BWRITERSIO_END,            NULL}, /* Sentinel value, it signals
1023                                           the end of the list */
1024 };
1025
1026 static const struct bytecode_backend vs_1_x_backend = {
1027     vs_1_x_header,
1028     end,
1029     vs_1_x_srcreg,
1030     vs_12_dstreg,
1031     sm_1_x_opcode,
1032     vs_1_x_handlers
1033 };
1034
1035 static void instr_ps_1_0123_texld(struct bc_writer *This,
1036                                   const struct instruction *instr,
1037                                   struct bytecode_buffer *buffer) {
1038     DWORD idx;
1039     struct shader_reg reg;
1040     DWORD swizzlemask;
1041
1042     if(instr->src[1].type != BWRITERSPR_SAMPLER ||
1043        instr->src[1].regnum > 3) {
1044         WARN("Unsupported sampler type %u regnum %u\n",
1045              instr->src[1].type, instr->src[1].regnum);
1046         This->state = E_INVALIDARG;
1047         return;
1048     } else if(instr->dst.type != BWRITERSPR_TEMP) {
1049         WARN("Can only sample into a temp register\n");
1050         This->state = E_INVALIDARG;
1051         return;
1052     }
1053
1054     idx = instr->src[1].regnum;
1055     if((idx == 0 && instr->dst.regnum != T0_REG) ||
1056        (idx == 1 && instr->dst.regnum != T1_REG) ||
1057        (idx == 2 && instr->dst.regnum != T2_REG) ||
1058        (idx == 3 && instr->dst.regnum != T3_REG)) {
1059         WARN("Sampling from sampler s%u to register r%u is not possible in ps_1_x\n",
1060              idx, instr->dst.regnum);
1061         This->state = E_INVALIDARG;
1062         return;
1063     }
1064     if(instr->src[0].type == BWRITERSPR_INPUT) {
1065         /* A simple non-dependent read tex instruction */
1066         if(instr->src[0].regnum != This->t_regnum[idx]) {
1067             WARN("Cannot sample from s%u with texture address data from interpolator %u\n",
1068                  idx, instr->src[0].regnum);
1069             This->state = E_INVALIDARG;
1070             return;
1071         }
1072         This->funcs->opcode(This, instr, D3DSIO_TEX & D3DSI_OPCODE_MASK, buffer);
1073
1074         /* map the temp dstreg to the ps_1_3 texture temporary register */
1075         This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1076     } else if(instr->src[0].type == BWRITERSPR_TEMP) {
1077
1078         swizzlemask = (3 << BWRITERVS_SWIZZLE_SHIFT) |
1079             (3 << (BWRITERVS_SWIZZLE_SHIFT + 2)) |
1080             (3 << (BWRITERVS_SWIZZLE_SHIFT + 4));
1081         if((instr->src[0].swizzle & swizzlemask) == (BWRITERVS_X_X | BWRITERVS_Y_Y | BWRITERVS_Z_Z)) {
1082             TRACE("writing texreg2rgb\n");
1083             This->funcs->opcode(This, instr, D3DSIO_TEXREG2RGB & D3DSI_OPCODE_MASK, buffer);
1084         } else if(instr->src[0].swizzle == (BWRITERVS_X_W | BWRITERVS_Y_X | BWRITERVS_Z_X | BWRITERVS_W_X)) {
1085             TRACE("writing texreg2ar\n");
1086             This->funcs->opcode(This, instr, D3DSIO_TEXREG2AR & D3DSI_OPCODE_MASK, buffer);
1087         } else if(instr->src[0].swizzle == (BWRITERVS_X_Y | BWRITERVS_Y_Z | BWRITERVS_Z_Z | BWRITERVS_W_Z)) {
1088             TRACE("writing texreg2gb\n");
1089             This->funcs->opcode(This, instr, D3DSIO_TEXREG2GB & D3DSI_OPCODE_MASK, buffer);
1090         } else {
1091             WARN("Unsupported src addr swizzle in dependent texld: 0x%08x\n", instr->src[0].swizzle);
1092             This->state = E_INVALIDARG;
1093             return;
1094         }
1095
1096         /* Dst and src reg can be mapped normally. Both registers are temporary registers in the
1097          * source shader and have to be mapped to the temporary form of the texture registers. However,
1098          * the src reg doesn't have a swizzle
1099          */
1100         This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1101         reg = instr->src[0];
1102         reg.swizzle = BWRITERVS_NOSWIZZLE;
1103         This->funcs->srcreg(This, &reg, buffer);
1104     } else {
1105         WARN("Invalid address data source register\n");
1106         This->state = E_INVALIDARG;
1107         return;
1108     }
1109 }
1110
1111 static void instr_ps_1_0123_mov(struct bc_writer *This,
1112                                 const struct instruction *instr,
1113                                 struct bytecode_buffer *buffer) {
1114     DWORD token = D3DSIO_MOV & D3DSI_OPCODE_MASK;
1115
1116     if(instr->dst.type == BWRITERSPR_TEMP && instr->src[0].type == BWRITERSPR_INPUT) {
1117         if((instr->dst.regnum == T0_REG && instr->src[0].regnum == This->t_regnum[0]) ||
1118            (instr->dst.regnum == T1_REG && instr->src[0].regnum == This->t_regnum[1]) ||
1119            (instr->dst.regnum == T2_REG && instr->src[0].regnum == This->t_regnum[2]) ||
1120            (instr->dst.regnum == T3_REG && instr->src[0].regnum == This->t_regnum[3])) {
1121             if(instr->dstmod & BWRITERSPDM_SATURATE) {
1122                 This->funcs->opcode(This, instr, D3DSIO_TEXCOORD & D3DSI_OPCODE_MASK, buffer);
1123                 /* Remove the SATURATE flag, it's implicit to the instruction */
1124                 This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod & (~BWRITERSPDM_SATURATE));
1125                 return;
1126             } else {
1127                 WARN("A varying -> temp copy is only supported with the SATURATE modifier in <=ps_1_3\n");
1128                 This->state = E_INVALIDARG;
1129                 return;
1130             }
1131         } else if(instr->src[0].regnum == This->v_regnum[0] ||
1132                   instr->src[0].regnum == This->v_regnum[1]) {
1133             /* Handled by the normal mov below. Just drop out of the if condition */
1134         } else {
1135             WARN("Unsupported varying -> temp mov in <= ps_1_3\n");
1136             This->state = E_INVALIDARG;
1137             return;
1138         }
1139     }
1140
1141     This->funcs->opcode(This, instr, token, buffer);
1142     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1143     This->funcs->srcreg(This, &instr->src[0], buffer);
1144 }
1145
1146 static const struct instr_handler_table ps_1_0123_handlers[] = {
1147     {BWRITERSIO_ADD,            instr_handler},
1148     {BWRITERSIO_NOP,            instr_handler},
1149     {BWRITERSIO_MOV,            instr_ps_1_0123_mov},
1150     {BWRITERSIO_SUB,            instr_handler},
1151     {BWRITERSIO_MAD,            instr_handler},
1152     {BWRITERSIO_MUL,            instr_handler},
1153     {BWRITERSIO_DP3,            instr_handler},
1154     {BWRITERSIO_DP4,            instr_handler},
1155     {BWRITERSIO_LRP,            instr_handler},
1156
1157     /* pshader instructions */
1158     {BWRITERSIO_CND,            instr_handler},
1159     {BWRITERSIO_CMP,            instr_handler},
1160     {BWRITERSIO_TEXKILL,        instr_handler},
1161     {BWRITERSIO_TEX,            instr_ps_1_0123_texld},
1162     {BWRITERSIO_TEXBEM,         instr_handler},
1163     {BWRITERSIO_TEXBEML,        instr_handler},
1164     {BWRITERSIO_TEXM3x2PAD,     instr_handler},
1165     {BWRITERSIO_TEXM3x3PAD,     instr_handler},
1166     {BWRITERSIO_TEXM3x3SPEC,    instr_handler},
1167     {BWRITERSIO_TEXM3x3VSPEC,   instr_handler},
1168     {BWRITERSIO_TEXM3x3TEX,     instr_handler},
1169     {BWRITERSIO_TEXM3x3,        instr_handler},
1170     {BWRITERSIO_TEXM3x2DEPTH,   instr_handler},
1171     {BWRITERSIO_TEXM3x2TEX,     instr_handler},
1172     {BWRITERSIO_TEXDP3,         instr_handler},
1173     {BWRITERSIO_TEXDP3TEX,      instr_handler},
1174     {BWRITERSIO_END,            NULL},
1175 };
1176
1177 static const struct bytecode_backend ps_1_0123_backend = {
1178     ps_1_x_header,
1179     end,
1180     ps_1_0123_srcreg,
1181     ps_1_0123_dstreg,
1182     sm_1_x_opcode,
1183     ps_1_0123_handlers
1184 };
1185
1186 static void ps_1_4_srcreg(struct bc_writer *This, const struct shader_reg *reg,
1187                           struct bytecode_buffer *buffer) {
1188     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1189     if(reg->rel_reg) {
1190         WARN("Relative addressing not supported in <= ps_3_0\n");
1191         This->state = E_INVALIDARG;
1192         return;
1193     }
1194
1195     switch(reg->type) {
1196         case BWRITERSPR_INPUT:
1197             token |= map_ps_input(This, reg);
1198             break;
1199
1200         /* Can be mapped 1:1 */
1201         case BWRITERSPR_TEMP:
1202         case BWRITERSPR_CONST:
1203             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1204             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1205             break;
1206
1207         default:
1208             WARN("Invalid register type for ps_1_4 shader\n");
1209             This->state = E_INVALIDARG;
1210             return;
1211     }
1212
1213     token |= d3d9_swizzle(reg->swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1214
1215     if(reg->srcmod == BWRITERSPSM_ABS || reg->srcmod == BWRITERSPSM_ABSNEG ||
1216        reg->srcmod == BWRITERSPSM_NOT) {
1217         WARN("Invalid source modifier %u for ps_1_4\n", reg->srcmod);
1218         This->state = E_INVALIDARG;
1219         return;
1220     }
1221     token |= d3d9_srcmod(reg->srcmod);
1222     put_dword(buffer, token);
1223 }
1224
1225 static void ps_1_4_dstreg(struct bc_writer *This, const struct shader_reg *reg,
1226                           struct bytecode_buffer *buffer,
1227                           DWORD shift, DWORD mod) {
1228     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1229
1230     if(reg->rel_reg) {
1231         WARN("Relative addressing not supported for destination registers\n");
1232         This->state = E_INVALIDARG;
1233         return;
1234     }
1235
1236     switch(reg->type) {
1237         case BWRITERSPR_TEMP: /* 1:1 mapping */
1238             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1239             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1240             break;
1241
1242         /* For texkill */
1243         case BWRITERSPR_INPUT:
1244             token |= map_ps_input(This, reg);
1245             break;
1246
1247         default:
1248             WARN("Invalid dest register type for 1.x pshader\n");
1249             This->state = E_INVALIDARG;
1250             return;
1251     }
1252
1253     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1254     token |= d3d9_dstmod(mod);
1255
1256     token |= d3d9_writemask(reg->writemask);
1257     put_dword(buffer, token);
1258 }
1259
1260 static void instr_ps_1_4_mov(struct bc_writer *This,
1261                              const struct instruction *instr,
1262                              struct bytecode_buffer *buffer) {
1263     DWORD token = D3DSIO_MOV & D3DSI_OPCODE_MASK;
1264
1265     if(instr->dst.type == BWRITERSPR_TEMP && instr->src[0].type == BWRITERSPR_INPUT) {
1266         if(instr->src[0].regnum == This->t_regnum[0] ||
1267            instr->src[0].regnum == This->t_regnum[1] ||
1268            instr->src[0].regnum == This->t_regnum[2] ||
1269            instr->src[0].regnum == This->t_regnum[3] ||
1270            instr->src[0].regnum == This->t_regnum[4] ||
1271            instr->src[0].regnum == This->t_regnum[5]) {
1272             /* Similar to a regular mov, but a different opcode */
1273             token = D3DSIO_TEXCOORD & D3DSI_OPCODE_MASK;
1274         } else if(instr->src[0].regnum == This->v_regnum[0] ||
1275                   instr->src[0].regnum == This->v_regnum[1]) {
1276             /* Handled by the normal mov below. Just drop out of the if condition */
1277         } else {
1278             WARN("Unsupported varying -> temp mov in ps_1_4\n");
1279             This->state = E_INVALIDARG;
1280             return;
1281         }
1282     }
1283
1284     This->funcs->opcode(This, instr, token, buffer);
1285     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1286     This->funcs->srcreg(This, &instr->src[0], buffer);
1287 }
1288
1289 static void instr_ps_1_4_texld(struct bc_writer *This,
1290                                const struct instruction *instr,
1291                                struct bytecode_buffer *buffer) {
1292     if(instr->src[1].type != BWRITERSPR_SAMPLER ||
1293        instr->src[1].regnum > 5) {
1294         WARN("Unsupported sampler type %u regnum %u\n",
1295              instr->src[1].type, instr->src[1].regnum);
1296         This->state = E_INVALIDARG;
1297         return;
1298     } else if(instr->dst.type != BWRITERSPR_TEMP) {
1299         WARN("Can only sample into a temp register\n");
1300         This->state = E_INVALIDARG;
1301         return;
1302     }
1303
1304     if(instr->src[1].regnum != instr->dst.regnum) {
1305         WARN("Sampling from sampler s%u to register r%u is not possible in ps_1_4\n",
1306              instr->src[1].regnum, instr->dst.regnum);
1307         This->state = E_INVALIDARG;
1308         return;
1309     }
1310
1311     This->funcs->opcode(This, instr, D3DSIO_TEX & D3DSI_OPCODE_MASK, buffer);
1312     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1313     This->funcs->srcreg(This, &instr->src[0], buffer);
1314 }
1315
1316 static const struct instr_handler_table ps_1_4_handlers[] = {
1317     {BWRITERSIO_ADD,            instr_handler},
1318     {BWRITERSIO_NOP,            instr_handler},
1319     {BWRITERSIO_MOV,            instr_ps_1_4_mov},
1320     {BWRITERSIO_SUB,            instr_handler},
1321     {BWRITERSIO_MAD,            instr_handler},
1322     {BWRITERSIO_MUL,            instr_handler},
1323     {BWRITERSIO_DP3,            instr_handler},
1324     {BWRITERSIO_DP4,            instr_handler},
1325     {BWRITERSIO_LRP,            instr_handler},
1326
1327     /* pshader instructions */
1328     {BWRITERSIO_CND,            instr_handler},
1329     {BWRITERSIO_CMP,            instr_handler},
1330     {BWRITERSIO_TEXKILL,        instr_handler},
1331     {BWRITERSIO_TEX,            instr_ps_1_4_texld},
1332     {BWRITERSIO_TEXDEPTH,       instr_handler},
1333     {BWRITERSIO_BEM,            instr_handler},
1334
1335     {BWRITERSIO_PHASE,          instr_handler},
1336     {BWRITERSIO_END,            NULL},
1337 };
1338
1339 static const struct bytecode_backend ps_1_4_backend = {
1340     ps_1_4_header,
1341     end,
1342     ps_1_4_srcreg,
1343     ps_1_4_dstreg,
1344     sm_1_x_opcode,
1345     ps_1_4_handlers
1346 };
1347
1348 static void write_constB(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
1349     write_const(shader->constB, shader->num_cb, D3DSIO_DEFB, D3DSPR_CONSTBOOL, buffer, len);
1350 }
1351
1352 static void write_constI(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
1353     write_const(shader->constI, shader->num_ci, D3DSIO_DEFI, D3DSPR_CONSTINT, buffer, len);
1354 }
1355
1356 static void vs_2_header(struct bc_writer *This,
1357                         const struct bwriter_shader *shader,
1358                         struct bytecode_buffer *buffer) {
1359     HRESULT hr;
1360
1361     hr = vs_find_builtin_varyings(This, shader);
1362     if(FAILED(hr)) {
1363         This->state = hr;
1364         return;
1365     }
1366
1367     /* Declare the shader type and version */
1368     put_dword(buffer, This->version);
1369
1370     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1371     write_constF(shader, buffer, TRUE);
1372     write_constB(shader, buffer, TRUE);
1373     write_constI(shader, buffer, TRUE);
1374     return;
1375 }
1376
1377 static void vs_2_srcreg(struct bc_writer *This,
1378                         const struct shader_reg *reg,
1379                         struct bytecode_buffer *buffer) {
1380     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1381     DWORD has_swizzle;
1382     DWORD component;
1383     DWORD d3d9reg;
1384
1385     switch(reg->type) {
1386         case BWRITERSPR_OUTPUT:
1387             /* Map the swizzle to a writemask, the format expected
1388                by map_vs_output
1389              */
1390             switch(reg->swizzle) {
1391                 case BWRITERVS_SWIZZLE_X:
1392                     component = BWRITERSP_WRITEMASK_0;
1393                     break;
1394                 case BWRITERVS_SWIZZLE_Y:
1395                     component = BWRITERSP_WRITEMASK_1;
1396                     break;
1397                 case BWRITERVS_SWIZZLE_Z:
1398                     component = BWRITERSP_WRITEMASK_2;
1399                     break;
1400                 case BWRITERVS_SWIZZLE_W:
1401                     component = BWRITERSP_WRITEMASK_3;
1402                     break;
1403                 default:
1404                     component = 0;
1405             }
1406             token |= map_vs_output(This, reg->regnum, component, &has_swizzle);
1407             break;
1408
1409         case BWRITERSPR_RASTOUT:
1410         case BWRITERSPR_ATTROUT:
1411             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
1412              * but are unexpected. If we hit this path it might be due to an error.
1413              */
1414             FIXME("Unexpected register type %u\n", reg->type);
1415             /* drop through */
1416         case BWRITERSPR_INPUT:
1417         case BWRITERSPR_TEMP:
1418         case BWRITERSPR_CONST:
1419         case BWRITERSPR_ADDR:
1420         case BWRITERSPR_CONSTINT:
1421         case BWRITERSPR_CONSTBOOL:
1422         case BWRITERSPR_LABEL:
1423             d3d9reg = d3d9_register(reg->type);
1424             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1425             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1426             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1427             break;
1428
1429         case BWRITERSPR_LOOP:
1430             if(reg->regnum != 0) {
1431                 WARN("Only regnum 0 is supported for the loop index register in vs_2_0\n");
1432                 This->state = E_INVALIDARG;
1433                 return;
1434             }
1435             token |= (D3DSPR_LOOP << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1436             token |= (D3DSPR_LOOP << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1437             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
1438             break;
1439
1440         case BWRITERSPR_PREDICATE:
1441             if(This->version != BWRITERVS_VERSION(2, 1)){
1442                 WARN("Predicate register is allowed only in vs_2_x\n");
1443                 This->state = E_INVALIDARG;
1444                 return;
1445             }
1446             if(reg->regnum > 0) {
1447                 WARN("Only predicate register 0 is supported\n");
1448                 This->state = E_INVALIDARG;
1449                 return;
1450             }
1451             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1452             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1453             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
1454
1455             break;
1456
1457         default:
1458             WARN("Invalid register type for 2.0 vshader\n");
1459             This->state = E_INVALIDARG;
1460             return;
1461     }
1462
1463     token |= d3d9_swizzle(reg->swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1464
1465     token |= d3d9_srcmod(reg->srcmod);
1466
1467     if(reg->rel_reg)
1468         token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1469
1470     put_dword(buffer, token);
1471
1472     /* vs_2_0 and newer write the register containing the index explicitly in the
1473      * binary code
1474      */
1475     if(token & D3DVS_ADDRMODE_RELATIVE)
1476         vs_2_srcreg(This, reg->rel_reg, buffer);
1477 }
1478
1479 static void sm_2_opcode(struct bc_writer *This,
1480                         const struct instruction *instr,
1481                         DWORD token, struct bytecode_buffer *buffer) {
1482     /* From sm 2 onwards instruction length is encoded in the opcode field */
1483     int dsts = instr->has_dst ? 1 : 0;
1484     token |= instrlen(instr, instr->num_srcs, dsts) << D3DSI_INSTLENGTH_SHIFT;
1485     if(instr->comptype)
1486         token |= (d3d9_comparetype(instr->comptype) << 16) & (0xf << 16);
1487     if(instr->has_predicate)
1488         token |= D3DSHADER_INSTRUCTION_PREDICATED;
1489     put_dword(buffer,token);
1490 }
1491
1492 static const struct instr_handler_table vs_2_0_handlers[] = {
1493     {BWRITERSIO_ADD,            instr_handler},
1494     {BWRITERSIO_NOP,            instr_handler},
1495     {BWRITERSIO_MOV,            instr_handler},
1496     {BWRITERSIO_SUB,            instr_handler},
1497     {BWRITERSIO_MAD,            instr_handler},
1498     {BWRITERSIO_MUL,            instr_handler},
1499     {BWRITERSIO_RCP,            instr_handler},
1500     {BWRITERSIO_RSQ,            instr_handler},
1501     {BWRITERSIO_DP3,            instr_handler},
1502     {BWRITERSIO_DP4,            instr_handler},
1503     {BWRITERSIO_MIN,            instr_handler},
1504     {BWRITERSIO_MAX,            instr_handler},
1505     {BWRITERSIO_SLT,            instr_handler},
1506     {BWRITERSIO_SGE,            instr_handler},
1507     {BWRITERSIO_ABS,            instr_handler},
1508     {BWRITERSIO_EXP,            instr_handler},
1509     {BWRITERSIO_LOG,            instr_handler},
1510     {BWRITERSIO_EXPP,           instr_handler},
1511     {BWRITERSIO_LOGP,           instr_handler},
1512     {BWRITERSIO_DST,            instr_handler},
1513     {BWRITERSIO_LRP,            instr_handler},
1514     {BWRITERSIO_FRC,            instr_handler},
1515     {BWRITERSIO_CRS,            instr_handler},
1516     {BWRITERSIO_SGN,            instr_handler},
1517     {BWRITERSIO_NRM,            instr_handler},
1518     {BWRITERSIO_SINCOS,         instr_handler},
1519     {BWRITERSIO_M4x4,           instr_handler},
1520     {BWRITERSIO_M4x3,           instr_handler},
1521     {BWRITERSIO_M3x4,           instr_handler},
1522     {BWRITERSIO_M3x3,           instr_handler},
1523     {BWRITERSIO_M3x2,           instr_handler},
1524     {BWRITERSIO_LIT,            instr_handler},
1525     {BWRITERSIO_POW,            instr_handler},
1526     {BWRITERSIO_MOVA,           instr_handler},
1527
1528     {BWRITERSIO_CALL,           instr_handler},
1529     {BWRITERSIO_CALLNZ,         instr_handler},
1530     {BWRITERSIO_REP,            instr_handler},
1531     {BWRITERSIO_ENDREP,         instr_handler},
1532     {BWRITERSIO_IF,             instr_handler},
1533     {BWRITERSIO_LABEL,          instr_handler},
1534     {BWRITERSIO_ELSE,           instr_handler},
1535     {BWRITERSIO_ENDIF,          instr_handler},
1536     {BWRITERSIO_LOOP,           instr_handler},
1537     {BWRITERSIO_RET,            instr_handler},
1538     {BWRITERSIO_ENDLOOP,        instr_handler},
1539
1540     {BWRITERSIO_END,            NULL},
1541 };
1542
1543 static const struct bytecode_backend vs_2_0_backend = {
1544     vs_2_header,
1545     end,
1546     vs_2_srcreg,
1547     vs_12_dstreg,
1548     sm_2_opcode,
1549     vs_2_0_handlers
1550 };
1551
1552 static const struct instr_handler_table vs_2_x_handlers[] = {
1553     {BWRITERSIO_ADD,            instr_handler},
1554     {BWRITERSIO_NOP,            instr_handler},
1555     {BWRITERSIO_MOV,            instr_handler},
1556     {BWRITERSIO_SUB,            instr_handler},
1557     {BWRITERSIO_MAD,            instr_handler},
1558     {BWRITERSIO_MUL,            instr_handler},
1559     {BWRITERSIO_RCP,            instr_handler},
1560     {BWRITERSIO_RSQ,            instr_handler},
1561     {BWRITERSIO_DP3,            instr_handler},
1562     {BWRITERSIO_DP4,            instr_handler},
1563     {BWRITERSIO_MIN,            instr_handler},
1564     {BWRITERSIO_MAX,            instr_handler},
1565     {BWRITERSIO_SLT,            instr_handler},
1566     {BWRITERSIO_SGE,            instr_handler},
1567     {BWRITERSIO_ABS,            instr_handler},
1568     {BWRITERSIO_EXP,            instr_handler},
1569     {BWRITERSIO_LOG,            instr_handler},
1570     {BWRITERSIO_EXPP,           instr_handler},
1571     {BWRITERSIO_LOGP,           instr_handler},
1572     {BWRITERSIO_DST,            instr_handler},
1573     {BWRITERSIO_LRP,            instr_handler},
1574     {BWRITERSIO_FRC,            instr_handler},
1575     {BWRITERSIO_CRS,            instr_handler},
1576     {BWRITERSIO_SGN,            instr_handler},
1577     {BWRITERSIO_NRM,            instr_handler},
1578     {BWRITERSIO_SINCOS,         instr_handler},
1579     {BWRITERSIO_M4x4,           instr_handler},
1580     {BWRITERSIO_M4x3,           instr_handler},
1581     {BWRITERSIO_M3x4,           instr_handler},
1582     {BWRITERSIO_M3x3,           instr_handler},
1583     {BWRITERSIO_M3x2,           instr_handler},
1584     {BWRITERSIO_LIT,            instr_handler},
1585     {BWRITERSIO_POW,            instr_handler},
1586     {BWRITERSIO_MOVA,           instr_handler},
1587
1588     {BWRITERSIO_CALL,           instr_handler},
1589     {BWRITERSIO_CALLNZ,         instr_handler},
1590     {BWRITERSIO_REP,            instr_handler},
1591     {BWRITERSIO_ENDREP,         instr_handler},
1592     {BWRITERSIO_IF,             instr_handler},
1593     {BWRITERSIO_LABEL,          instr_handler},
1594     {BWRITERSIO_IFC,            instr_handler},
1595     {BWRITERSIO_ELSE,           instr_handler},
1596     {BWRITERSIO_ENDIF,          instr_handler},
1597     {BWRITERSIO_BREAK,          instr_handler},
1598     {BWRITERSIO_BREAKC,         instr_handler},
1599     {BWRITERSIO_LOOP,           instr_handler},
1600     {BWRITERSIO_RET,            instr_handler},
1601     {BWRITERSIO_ENDLOOP,        instr_handler},
1602
1603     {BWRITERSIO_SETP,           instr_handler},
1604     {BWRITERSIO_BREAKP,         instr_handler},
1605
1606     {BWRITERSIO_END,            NULL},
1607 };
1608
1609 static const struct bytecode_backend vs_2_x_backend = {
1610     vs_2_header,
1611     end,
1612     vs_2_srcreg,
1613     vs_12_dstreg,
1614     sm_2_opcode,
1615     vs_2_x_handlers
1616 };
1617
1618 static void write_samplers(const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1619     DWORD i;
1620     DWORD instr_dcl = D3DSIO_DCL | (2 << D3DSI_INSTLENGTH_SHIFT);
1621     DWORD token;
1622     const DWORD reg = (1<<31) |
1623         ((D3DSPR_SAMPLER << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
1624         ((D3DSPR_SAMPLER << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
1625         D3DSP_WRITEMASK_ALL;
1626
1627     for(i = 0; i < shader->num_samplers; i++) {
1628         /* Write the DCL instruction */
1629         put_dword(buffer, instr_dcl);
1630         token = (1<<31);
1631         /* Already shifted */
1632         token |= (d3d9_sampler(shader->samplers[i].type)) & D3DSP_TEXTURETYPE_MASK;
1633         put_dword(buffer, token);
1634         token = reg | (shader->samplers[i].regnum & D3DSP_REGNUM_MASK);
1635         token |= d3d9_dstmod(shader->samplers[i].mod);
1636         put_dword(buffer, token);
1637     }
1638 }
1639
1640 static void ps_2_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1641     HRESULT hr = find_ps_builtin_semantics(This, shader, 8);
1642     if(FAILED(hr)) {
1643         This->state = hr;
1644         return;
1645     }
1646
1647     /* Declare the shader type and version */
1648     put_dword(buffer, This->version);
1649     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1650     write_samplers(shader, buffer);
1651     write_constF(shader, buffer, TRUE);
1652     write_constB(shader, buffer, TRUE);
1653     write_constI(shader, buffer, TRUE);
1654 }
1655
1656 static void ps_2_srcreg(struct bc_writer *This,
1657                         const struct shader_reg *reg,
1658                         struct bytecode_buffer *buffer) {
1659     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1660     DWORD d3d9reg;
1661     if(reg->rel_reg) {
1662         WARN("Relative addressing not supported in <= ps_3_0\n");
1663         This->state = E_INVALIDARG;
1664         return;
1665     }
1666
1667     switch(reg->type) {
1668         case BWRITERSPR_INPUT:
1669             token |= map_ps_input(This, reg);
1670             break;
1671
1672             /* Can be mapped 1:1 */
1673         case BWRITERSPR_TEMP:
1674         case BWRITERSPR_CONST:
1675         case BWRITERSPR_COLOROUT:
1676         case BWRITERSPR_CONSTBOOL:
1677         case BWRITERSPR_CONSTINT:
1678         case BWRITERSPR_SAMPLER:
1679         case BWRITERSPR_LABEL:
1680         case BWRITERSPR_DEPTHOUT:
1681             d3d9reg = d3d9_register(reg->type);
1682             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1683             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1684             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1685             break;
1686
1687         case BWRITERSPR_PREDICATE:
1688             if(This->version != BWRITERPS_VERSION(2, 1)){
1689                 WARN("Predicate register not supported in ps_2_0\n");
1690                 This->state = E_INVALIDARG;
1691             }
1692             if(reg->regnum) {
1693                 WARN("Predicate register with regnum %u not supported\n",
1694                      reg->regnum);
1695                 This->state = E_INVALIDARG;
1696             }
1697             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1698             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1699             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
1700             break;
1701
1702         default:
1703             WARN("Invalid register type for ps_2_0 shader\n");
1704             This->state = E_INVALIDARG;
1705             return;
1706     }
1707
1708     token |= d3d9_swizzle(reg->swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1709
1710     token |= d3d9_srcmod(reg->srcmod);
1711     put_dword(buffer, token);
1712 }
1713
1714 static void ps_2_0_dstreg(struct bc_writer *This,
1715                           const struct shader_reg *reg,
1716                           struct bytecode_buffer *buffer,
1717                           DWORD shift, DWORD mod) {
1718     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1719     DWORD d3d9reg;
1720
1721     if(reg->rel_reg) {
1722         WARN("Relative addressing not supported for destination registers\n");
1723         This->state = E_INVALIDARG;
1724         return;
1725     }
1726
1727     switch(reg->type) {
1728         case BWRITERSPR_TEMP: /* 1:1 mapping */
1729         case BWRITERSPR_COLOROUT:
1730         case BWRITERSPR_DEPTHOUT:
1731             d3d9reg = d3d9_register(reg->type);
1732             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1733             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1734             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1735             break;
1736
1737         case BWRITERSPR_PREDICATE:
1738             if(This->version != BWRITERPS_VERSION(2, 1)){
1739                 WARN("Predicate register not supported in ps_2_0\n");
1740                 This->state = E_INVALIDARG;
1741             }
1742             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1743             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1744             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1745             break;
1746
1747         /* texkill uses the input register as a destination parameter */
1748         case BWRITERSPR_INPUT:
1749             token |= map_ps_input(This, reg);
1750             break;
1751
1752         default:
1753             WARN("Invalid dest register type for 2.x pshader\n");
1754             This->state = E_INVALIDARG;
1755             return;
1756     }
1757
1758     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1759     token |= d3d9_dstmod(mod);
1760
1761     token |= d3d9_writemask(reg->writemask);
1762     put_dword(buffer, token);
1763 }
1764
1765 static const struct instr_handler_table ps_2_0_handlers[] = {
1766     {BWRITERSIO_ADD,            instr_handler},
1767     {BWRITERSIO_NOP,            instr_handler},
1768     {BWRITERSIO_MOV,            instr_handler},
1769     {BWRITERSIO_SUB,            instr_handler},
1770     {BWRITERSIO_MAD,            instr_handler},
1771     {BWRITERSIO_MUL,            instr_handler},
1772     {BWRITERSIO_RCP,            instr_handler},
1773     {BWRITERSIO_RSQ,            instr_handler},
1774     {BWRITERSIO_DP3,            instr_handler},
1775     {BWRITERSIO_DP4,            instr_handler},
1776     {BWRITERSIO_MIN,            instr_handler},
1777     {BWRITERSIO_MAX,            instr_handler},
1778     {BWRITERSIO_ABS,            instr_handler},
1779     {BWRITERSIO_EXP,            instr_handler},
1780     {BWRITERSIO_LOG,            instr_handler},
1781     {BWRITERSIO_EXPP,           instr_handler},
1782     {BWRITERSIO_LOGP,           instr_handler},
1783     {BWRITERSIO_LRP,            instr_handler},
1784     {BWRITERSIO_FRC,            instr_handler},
1785     {BWRITERSIO_CRS,            instr_handler},
1786     {BWRITERSIO_NRM,            instr_handler},
1787     {BWRITERSIO_SINCOS,         instr_handler},
1788     {BWRITERSIO_M4x4,           instr_handler},
1789     {BWRITERSIO_M4x3,           instr_handler},
1790     {BWRITERSIO_M3x4,           instr_handler},
1791     {BWRITERSIO_M3x3,           instr_handler},
1792     {BWRITERSIO_M3x2,           instr_handler},
1793     {BWRITERSIO_POW,            instr_handler},
1794     {BWRITERSIO_DP2ADD,         instr_handler},
1795     {BWRITERSIO_CMP,            instr_handler},
1796
1797     {BWRITERSIO_TEX,            instr_handler},
1798     {BWRITERSIO_TEXLDP,         instr_handler},
1799     {BWRITERSIO_TEXLDB,         instr_handler},
1800     {BWRITERSIO_TEXKILL,        instr_handler},
1801
1802     {BWRITERSIO_END,            NULL},
1803 };
1804
1805 static const struct bytecode_backend ps_2_0_backend = {
1806     ps_2_header,
1807     end,
1808     ps_2_srcreg,
1809     ps_2_0_dstreg,
1810     sm_2_opcode,
1811     ps_2_0_handlers
1812 };
1813
1814 static const struct instr_handler_table ps_2_x_handlers[] = {
1815     {BWRITERSIO_ADD,            instr_handler},
1816     {BWRITERSIO_NOP,            instr_handler},
1817     {BWRITERSIO_MOV,            instr_handler},
1818     {BWRITERSIO_SUB,            instr_handler},
1819     {BWRITERSIO_MAD,            instr_handler},
1820     {BWRITERSIO_MUL,            instr_handler},
1821     {BWRITERSIO_RCP,            instr_handler},
1822     {BWRITERSIO_RSQ,            instr_handler},
1823     {BWRITERSIO_DP3,            instr_handler},
1824     {BWRITERSIO_DP4,            instr_handler},
1825     {BWRITERSIO_MIN,            instr_handler},
1826     {BWRITERSIO_MAX,            instr_handler},
1827     {BWRITERSIO_ABS,            instr_handler},
1828     {BWRITERSIO_EXP,            instr_handler},
1829     {BWRITERSIO_LOG,            instr_handler},
1830     {BWRITERSIO_EXPP,           instr_handler},
1831     {BWRITERSIO_LOGP,           instr_handler},
1832     {BWRITERSIO_LRP,            instr_handler},
1833     {BWRITERSIO_FRC,            instr_handler},
1834     {BWRITERSIO_CRS,            instr_handler},
1835     {BWRITERSIO_NRM,            instr_handler},
1836     {BWRITERSIO_SINCOS,         instr_handler},
1837     {BWRITERSIO_M4x4,           instr_handler},
1838     {BWRITERSIO_M4x3,           instr_handler},
1839     {BWRITERSIO_M3x4,           instr_handler},
1840     {BWRITERSIO_M3x3,           instr_handler},
1841     {BWRITERSIO_M3x2,           instr_handler},
1842     {BWRITERSIO_POW,            instr_handler},
1843     {BWRITERSIO_DP2ADD,         instr_handler},
1844     {BWRITERSIO_CMP,            instr_handler},
1845
1846     {BWRITERSIO_CALL,           instr_handler},
1847     {BWRITERSIO_CALLNZ,         instr_handler},
1848     {BWRITERSIO_REP,            instr_handler},
1849     {BWRITERSIO_ENDREP,         instr_handler},
1850     {BWRITERSIO_IF,             instr_handler},
1851     {BWRITERSIO_LABEL,          instr_handler},
1852     {BWRITERSIO_IFC,            instr_handler},
1853     {BWRITERSIO_ELSE,           instr_handler},
1854     {BWRITERSIO_ENDIF,          instr_handler},
1855     {BWRITERSIO_BREAK,          instr_handler},
1856     {BWRITERSIO_BREAKC,         instr_handler},
1857     {BWRITERSIO_RET,            instr_handler},
1858
1859     {BWRITERSIO_TEX,            instr_handler},
1860     {BWRITERSIO_TEXLDP,         instr_handler},
1861     {BWRITERSIO_TEXLDB,         instr_handler},
1862     {BWRITERSIO_TEXKILL,        instr_handler},
1863     {BWRITERSIO_DSX,            instr_handler},
1864     {BWRITERSIO_DSY,            instr_handler},
1865
1866     {BWRITERSIO_SETP,           instr_handler},
1867     {BWRITERSIO_BREAKP,         instr_handler},
1868
1869     {BWRITERSIO_TEXLDD,         instr_handler},
1870
1871     {BWRITERSIO_END,            NULL},
1872 };
1873
1874 static const struct bytecode_backend ps_2_x_backend = {
1875     ps_2_header,
1876     end,
1877     ps_2_srcreg,
1878     ps_2_0_dstreg,
1879     sm_2_opcode,
1880     ps_2_x_handlers
1881 };
1882
1883 static void sm_3_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1884     /* Declare the shader type and version */
1885     put_dword(buffer, This->version);
1886
1887     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1888     write_declarations(This, buffer, TRUE, shader->outputs, shader->num_outputs, BWRITERSPR_OUTPUT);
1889     write_constF(shader, buffer, TRUE);
1890     write_constB(shader, buffer, TRUE);
1891     write_constI(shader, buffer, TRUE);
1892     write_samplers(shader, buffer);
1893     return;
1894 }
1895
1896 static void sm_3_srcreg(struct bc_writer *This,
1897                         const struct shader_reg *reg,
1898                         struct bytecode_buffer *buffer) {
1899     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1900     DWORD d3d9reg;
1901
1902     d3d9reg = d3d9_register(reg->type);
1903     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1904     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1905     token |= reg->regnum & D3DSP_REGNUM_MASK;
1906
1907     token |= d3d9_swizzle(reg->swizzle) & D3DVS_SWIZZLE_MASK;
1908     token |= d3d9_srcmod(reg->srcmod);
1909
1910     if(reg->rel_reg) {
1911         if(reg->type == BWRITERSPR_CONST && This->version == BWRITERPS_VERSION(3, 0)) {
1912             WARN("c%u[...] is unsupported in ps_3_0\n", reg->regnum);
1913             This->state = E_INVALIDARG;
1914             return;
1915         }
1916         if(((reg->rel_reg->type == BWRITERSPR_ADDR && This->version == BWRITERVS_VERSION(3, 0)) ||
1917            reg->rel_reg->type == BWRITERSPR_LOOP) &&
1918            reg->rel_reg->regnum == 0) {
1919             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1920         } else {
1921             WARN("Unsupported relative addressing register\n");
1922             This->state = E_INVALIDARG;
1923             return;
1924         }
1925     }
1926
1927     put_dword(buffer, token);
1928
1929     /* vs_2_0 and newer write the register containing the index explicitly in the
1930      * binary code
1931      */
1932     if(token & D3DVS_ADDRMODE_RELATIVE) {
1933         sm_3_srcreg(This, reg->rel_reg, buffer);
1934     }
1935 }
1936
1937 static void sm_3_dstreg(struct bc_writer *This,
1938                         const struct shader_reg *reg,
1939                         struct bytecode_buffer *buffer,
1940                         DWORD shift, DWORD mod) {
1941     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1942     DWORD d3d9reg;
1943
1944     if(reg->rel_reg) {
1945         if(This->version == BWRITERVS_VERSION(3, 0) &&
1946            reg->type == BWRITERSPR_OUTPUT) {
1947             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1948         } else {
1949             WARN("Relative addressing not supported for this shader type or register type\n");
1950             This->state = E_INVALIDARG;
1951             return;
1952         }
1953     }
1954
1955     d3d9reg = d3d9_register(reg->type);
1956     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1957     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1958     token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1959
1960     token |= d3d9_dstmod(mod);
1961
1962     token |= d3d9_writemask(reg->writemask);
1963     put_dword(buffer, token);
1964
1965     /* vs_2_0 and newer write the register containing the index explicitly in the
1966      * binary code
1967      */
1968     if(token & D3DVS_ADDRMODE_RELATIVE) {
1969         sm_3_srcreg(This, reg->rel_reg, buffer);
1970     }
1971 }
1972
1973 static const struct instr_handler_table vs_3_handlers[] = {
1974     {BWRITERSIO_ADD,            instr_handler},
1975     {BWRITERSIO_NOP,            instr_handler},
1976     {BWRITERSIO_MOV,            instr_handler},
1977     {BWRITERSIO_SUB,            instr_handler},
1978     {BWRITERSIO_MAD,            instr_handler},
1979     {BWRITERSIO_MUL,            instr_handler},
1980     {BWRITERSIO_RCP,            instr_handler},
1981     {BWRITERSIO_RSQ,            instr_handler},
1982     {BWRITERSIO_DP3,            instr_handler},
1983     {BWRITERSIO_DP4,            instr_handler},
1984     {BWRITERSIO_MIN,            instr_handler},
1985     {BWRITERSIO_MAX,            instr_handler},
1986     {BWRITERSIO_SLT,            instr_handler},
1987     {BWRITERSIO_SGE,            instr_handler},
1988     {BWRITERSIO_ABS,            instr_handler},
1989     {BWRITERSIO_EXP,            instr_handler},
1990     {BWRITERSIO_LOG,            instr_handler},
1991     {BWRITERSIO_EXPP,           instr_handler},
1992     {BWRITERSIO_LOGP,           instr_handler},
1993     {BWRITERSIO_DST,            instr_handler},
1994     {BWRITERSIO_LRP,            instr_handler},
1995     {BWRITERSIO_FRC,            instr_handler},
1996     {BWRITERSIO_CRS,            instr_handler},
1997     {BWRITERSIO_SGN,            instr_handler},
1998     {BWRITERSIO_NRM,            instr_handler},
1999     {BWRITERSIO_SINCOS,         instr_handler},
2000     {BWRITERSIO_M4x4,           instr_handler},
2001     {BWRITERSIO_M4x3,           instr_handler},
2002     {BWRITERSIO_M3x4,           instr_handler},
2003     {BWRITERSIO_M3x3,           instr_handler},
2004     {BWRITERSIO_M3x2,           instr_handler},
2005     {BWRITERSIO_LIT,            instr_handler},
2006     {BWRITERSIO_POW,            instr_handler},
2007     {BWRITERSIO_MOVA,           instr_handler},
2008
2009     {BWRITERSIO_CALL,           instr_handler},
2010     {BWRITERSIO_CALLNZ,         instr_handler},
2011     {BWRITERSIO_REP,            instr_handler},
2012     {BWRITERSIO_ENDREP,         instr_handler},
2013     {BWRITERSIO_IF,             instr_handler},
2014     {BWRITERSIO_LABEL,          instr_handler},
2015     {BWRITERSIO_IFC,            instr_handler},
2016     {BWRITERSIO_ELSE,           instr_handler},
2017     {BWRITERSIO_ENDIF,          instr_handler},
2018     {BWRITERSIO_BREAK,          instr_handler},
2019     {BWRITERSIO_BREAKC,         instr_handler},
2020     {BWRITERSIO_LOOP,           instr_handler},
2021     {BWRITERSIO_RET,            instr_handler},
2022     {BWRITERSIO_ENDLOOP,        instr_handler},
2023
2024     {BWRITERSIO_SETP,           instr_handler},
2025     {BWRITERSIO_BREAKP,         instr_handler},
2026     {BWRITERSIO_TEXLDL,         instr_handler},
2027
2028     {BWRITERSIO_END,            NULL},
2029 };
2030
2031 static const struct bytecode_backend vs_3_backend = {
2032     sm_3_header,
2033     end,
2034     sm_3_srcreg,
2035     sm_3_dstreg,
2036     sm_2_opcode,
2037     vs_3_handlers
2038 };
2039
2040 static const struct instr_handler_table ps_3_handlers[] = {
2041     {BWRITERSIO_ADD,            instr_handler},
2042     {BWRITERSIO_NOP,            instr_handler},
2043     {BWRITERSIO_MOV,            instr_handler},
2044     {BWRITERSIO_SUB,            instr_handler},
2045     {BWRITERSIO_MAD,            instr_handler},
2046     {BWRITERSIO_MUL,            instr_handler},
2047     {BWRITERSIO_RCP,            instr_handler},
2048     {BWRITERSIO_RSQ,            instr_handler},
2049     {BWRITERSIO_DP3,            instr_handler},
2050     {BWRITERSIO_DP4,            instr_handler},
2051     {BWRITERSIO_MIN,            instr_handler},
2052     {BWRITERSIO_MAX,            instr_handler},
2053     {BWRITERSIO_ABS,            instr_handler},
2054     {BWRITERSIO_EXP,            instr_handler},
2055     {BWRITERSIO_LOG,            instr_handler},
2056     {BWRITERSIO_EXPP,           instr_handler},
2057     {BWRITERSIO_LOGP,           instr_handler},
2058     {BWRITERSIO_LRP,            instr_handler},
2059     {BWRITERSIO_FRC,            instr_handler},
2060     {BWRITERSIO_CRS,            instr_handler},
2061     {BWRITERSIO_NRM,            instr_handler},
2062     {BWRITERSIO_SINCOS,         instr_handler},
2063     {BWRITERSIO_M4x4,           instr_handler},
2064     {BWRITERSIO_M4x3,           instr_handler},
2065     {BWRITERSIO_M3x4,           instr_handler},
2066     {BWRITERSIO_M3x3,           instr_handler},
2067     {BWRITERSIO_M3x2,           instr_handler},
2068     {BWRITERSIO_POW,            instr_handler},
2069     {BWRITERSIO_DP2ADD,         instr_handler},
2070     {BWRITERSIO_CMP,            instr_handler},
2071
2072     {BWRITERSIO_CALL,           instr_handler},
2073     {BWRITERSIO_CALLNZ,         instr_handler},
2074     {BWRITERSIO_REP,            instr_handler},
2075     {BWRITERSIO_ENDREP,         instr_handler},
2076     {BWRITERSIO_IF,             instr_handler},
2077     {BWRITERSIO_LABEL,          instr_handler},
2078     {BWRITERSIO_IFC,            instr_handler},
2079     {BWRITERSIO_ELSE,           instr_handler},
2080     {BWRITERSIO_ENDIF,          instr_handler},
2081     {BWRITERSIO_BREAK,          instr_handler},
2082     {BWRITERSIO_BREAKC,         instr_handler},
2083     {BWRITERSIO_LOOP,           instr_handler},
2084     {BWRITERSIO_RET,            instr_handler},
2085     {BWRITERSIO_ENDLOOP,        instr_handler},
2086
2087     {BWRITERSIO_SETP,           instr_handler},
2088     {BWRITERSIO_BREAKP,         instr_handler},
2089     {BWRITERSIO_TEXLDL,         instr_handler},
2090
2091     {BWRITERSIO_TEX,            instr_handler},
2092     {BWRITERSIO_TEXLDP,         instr_handler},
2093     {BWRITERSIO_TEXLDB,         instr_handler},
2094     {BWRITERSIO_TEXKILL,        instr_handler},
2095     {BWRITERSIO_DSX,            instr_handler},
2096     {BWRITERSIO_DSY,            instr_handler},
2097     {BWRITERSIO_TEXLDD,         instr_handler},
2098
2099     {BWRITERSIO_END,            NULL},
2100 };
2101
2102 static const struct bytecode_backend ps_3_backend = {
2103     sm_3_header,
2104     end,
2105     sm_3_srcreg,
2106     sm_3_dstreg,
2107     sm_2_opcode,
2108     ps_3_handlers
2109 };
2110
2111 static void init_vs10_dx9_writer(struct bc_writer *writer) {
2112     TRACE("Creating DirectX9 vertex shader 1.0 writer\n");
2113     writer->funcs = &vs_1_x_backend;
2114 }
2115
2116 static void init_vs11_dx9_writer(struct bc_writer *writer) {
2117     TRACE("Creating DirectX9 vertex shader 1.1 writer\n");
2118     writer->funcs = &vs_1_x_backend;
2119 }
2120
2121 static void init_vs20_dx9_writer(struct bc_writer *writer) {
2122     TRACE("Creating DirectX9 vertex shader 2.0 writer\n");
2123     writer->funcs = &vs_2_0_backend;
2124 }
2125
2126 static void init_vs2x_dx9_writer(struct bc_writer *writer) {
2127     TRACE("Creating DirectX9 vertex shader 2.x writer\n");
2128     writer->funcs = &vs_2_x_backend;
2129 }
2130
2131 static void init_vs30_dx9_writer(struct bc_writer *writer) {
2132     TRACE("Creating DirectX9 vertex shader 3.0 writer\n");
2133     writer->funcs = &vs_3_backend;
2134 }
2135
2136 static void init_ps10_dx9_writer(struct bc_writer *writer) {
2137     TRACE("Creating DirectX9 pixel shader 1.0 writer\n");
2138     writer->funcs = &ps_1_0123_backend;
2139 }
2140
2141 static void init_ps11_dx9_writer(struct bc_writer *writer) {
2142     TRACE("Creating DirectX9 pixel shader 1.1 writer\n");
2143     writer->funcs = &ps_1_0123_backend;
2144 }
2145
2146 static void init_ps12_dx9_writer(struct bc_writer *writer) {
2147     TRACE("Creating DirectX9 pixel shader 1.2 writer\n");
2148     writer->funcs = &ps_1_0123_backend;
2149 }
2150
2151 static void init_ps13_dx9_writer(struct bc_writer *writer) {
2152     TRACE("Creating DirectX9 pixel shader 1.3 writer\n");
2153     writer->funcs = &ps_1_0123_backend;
2154 }
2155
2156 static void init_ps14_dx9_writer(struct bc_writer *writer) {
2157     TRACE("Creating DirectX9 pixel shader 1.4 writer\n");
2158     writer->funcs = &ps_1_4_backend;
2159 }
2160
2161 static void init_ps20_dx9_writer(struct bc_writer *writer) {
2162     TRACE("Creating DirectX9 pixel shader 2.0 writer\n");
2163     writer->funcs = &ps_2_0_backend;
2164 }
2165
2166 static void init_ps2x_dx9_writer(struct bc_writer *writer) {
2167     TRACE("Creating DirectX9 pixel shader 2.x writer\n");
2168     writer->funcs = &ps_2_x_backend;
2169 }
2170
2171 static void init_ps30_dx9_writer(struct bc_writer *writer) {
2172     TRACE("Creating DirectX9 pixel shader 3.0 writer\n");
2173     writer->funcs = &ps_3_backend;
2174 }
2175
2176 static struct bc_writer *create_writer(DWORD version, DWORD dxversion) {
2177     struct bc_writer *ret = asm_alloc(sizeof(*ret));
2178
2179     if(!ret) {
2180         WARN("Failed to allocate a bytecode writer instance\n");
2181         return NULL;
2182     }
2183
2184     switch(version) {
2185         case BWRITERVS_VERSION(1, 0):
2186             if(dxversion != 9) {
2187                 WARN("Unsupported dxversion for vertex shader 1.0 requested: %u\n", dxversion);
2188                 goto fail;
2189             }
2190             init_vs10_dx9_writer(ret);
2191             break;
2192         case BWRITERVS_VERSION(1, 1):
2193             if(dxversion != 9) {
2194                 WARN("Unsupported dxversion for vertex shader 1.1 requested: %u\n", dxversion);
2195                 goto fail;
2196             }
2197             init_vs11_dx9_writer(ret);
2198             break;
2199         case BWRITERVS_VERSION(2, 0):
2200             if(dxversion != 9) {
2201                 WARN("Unsupported dxversion for vertex shader 2.0 requested: %u\n", dxversion);
2202                 goto fail;
2203             }
2204             init_vs20_dx9_writer(ret);
2205             break;
2206         case BWRITERVS_VERSION(2, 1):
2207             if(dxversion != 9) {
2208                 WARN("Unsupported dxversion for vertex shader 2.x requested: %u\n", dxversion);
2209                 goto fail;
2210             }
2211             init_vs2x_dx9_writer(ret);
2212             break;
2213         case BWRITERVS_VERSION(3, 0):
2214             if(dxversion != 9) {
2215                 WARN("Unsupported dxversion for vertex shader 3.0 requested: %u\n", dxversion);
2216                 goto fail;
2217             }
2218             init_vs30_dx9_writer(ret);
2219             break;
2220
2221         case BWRITERPS_VERSION(1, 0):
2222             if(dxversion != 9) {
2223                 WARN("Unsupported dxversion for pixel shader 1.0 requested: %u\n", dxversion);
2224                 goto fail;
2225             }
2226             init_ps10_dx9_writer(ret);
2227             break;
2228         case BWRITERPS_VERSION(1, 1):
2229             if(dxversion != 9) {
2230                 WARN("Unsupported dxversion for pixel shader 1.1 requested: %u\n", dxversion);
2231                 goto fail;
2232             }
2233             init_ps11_dx9_writer(ret);
2234             break;
2235         case BWRITERPS_VERSION(1, 2):
2236             if(dxversion != 9) {
2237                 WARN("Unsupported dxversion for pixel shader 1.2 requested: %u\n", dxversion);
2238                 goto fail;
2239             }
2240             init_ps12_dx9_writer(ret);
2241             break;
2242         case BWRITERPS_VERSION(1, 3):
2243             if(dxversion != 9) {
2244                 WARN("Unsupported dxversion for pixel shader 1.3 requested: %u\n", dxversion);
2245                 goto fail;
2246             }
2247             init_ps13_dx9_writer(ret);
2248             break;
2249         case BWRITERPS_VERSION(1, 4):
2250             if(dxversion != 9) {
2251                 WARN("Unsupported dxversion for pixel shader 1.4 requested: %u\n", dxversion);
2252                 goto fail;
2253             }
2254             init_ps14_dx9_writer(ret);
2255             break;
2256
2257         case BWRITERPS_VERSION(2, 0):
2258             if(dxversion != 9) {
2259                 WARN("Unsupported dxversion for pixel shader 2.0 requested: %u\n", dxversion);
2260                 goto fail;
2261             }
2262             init_ps20_dx9_writer(ret);
2263             break;
2264
2265         case BWRITERPS_VERSION(2, 1):
2266             if(dxversion != 9) {
2267                 WARN("Unsupported dxversion for pixel shader 2.x requested: %u\n", dxversion);
2268                 goto fail;
2269             }
2270             init_ps2x_dx9_writer(ret);
2271             break;
2272
2273         case BWRITERPS_VERSION(3, 0):
2274             if(dxversion != 9) {
2275                 WARN("Unsupported dxversion for pixel shader 3.0 requested: %u\n", dxversion);
2276                 goto fail;
2277             }
2278             init_ps30_dx9_writer(ret);
2279             break;
2280
2281         default:
2282             WARN("Unexpected shader version requested: %08x\n", version);
2283             goto fail;
2284     }
2285     ret->version = version;
2286     return ret;
2287
2288 fail:
2289     asm_free(ret);
2290     return NULL;
2291 }
2292
2293 static HRESULT call_instr_handler(struct bc_writer *writer,
2294                                   const struct instruction *instr,
2295                                   struct bytecode_buffer *buffer) {
2296     DWORD i=0;
2297
2298     while(writer->funcs->instructions[i].opcode != BWRITERSIO_END) {
2299         if(instr->opcode == writer->funcs->instructions[i].opcode) {
2300             if(!writer->funcs->instructions[i].func) {
2301                 WARN("Opcode %u not supported by this profile\n", instr->opcode);
2302                 return E_INVALIDARG;
2303             }
2304             writer->funcs->instructions[i].func(writer, instr, buffer);
2305             return S_OK;
2306         }
2307         i++;
2308     }
2309
2310     FIXME("Unhandled instruction %u - %s\n", instr->opcode,
2311           debug_print_opcode(instr->opcode));
2312     return E_INVALIDARG;
2313 }
2314
2315 /* SlWriteBytecode (wineshader.@)
2316  *
2317  * Writes shader version specific bytecode from the shader passed in.
2318  * The returned bytecode can be passed to the Direct3D runtime like
2319  * IDirect3DDevice9::Create*Shader.
2320  *
2321  * Parameters:
2322  *  shader: Shader to translate into bytecode
2323  *  version: Shader version to generate(d3d version token)
2324  *  dxversion: DirectX version the code targets
2325  *  result: the resulting shader bytecode
2326  *
2327  * Return values:
2328  *  S_OK on success
2329  */
2330 DWORD SlWriteBytecode(const struct bwriter_shader *shader, int dxversion, DWORD **result) {
2331     struct bc_writer *writer;
2332     struct bytecode_buffer *buffer = NULL;
2333     HRESULT hr;
2334     unsigned int i;
2335
2336     if(!shader){
2337         ERR("NULL shader structure, aborting\n");
2338         return E_FAIL;
2339     }
2340     writer = create_writer(shader->version, dxversion);
2341     *result = NULL;
2342
2343     if(!writer) {
2344         WARN("Could not create a bytecode writer instance. Either unsupported version\n");
2345         WARN("or out of memory\n");
2346         hr = E_FAIL;
2347         goto error;
2348     }
2349
2350     buffer = allocate_buffer();
2351     if(!buffer) {
2352         WARN("Failed to allocate a buffer for the shader bytecode\n");
2353         hr = E_FAIL;
2354         goto error;
2355     }
2356
2357     writer->funcs->header(writer, shader, buffer);
2358     if(FAILED(writer->state)) {
2359         hr = writer->state;
2360         goto error;
2361     }
2362
2363     for(i = 0; i < shader->num_instrs; i++) {
2364         hr = call_instr_handler(writer, shader->instr[i], buffer);
2365         if(FAILED(hr)) {
2366             goto error;
2367         }
2368     }
2369
2370     if(FAILED(writer->state)) {
2371         hr = writer->state;
2372         goto error;
2373     }
2374
2375     writer->funcs->end(writer, shader, buffer);
2376
2377     if(FAILED(buffer->state)) {
2378         hr = buffer->state;
2379         goto error;
2380     }
2381
2382     /* Cut off unneeded memory from the result buffer */
2383     *result = asm_realloc(buffer->data,
2384                          sizeof(DWORD) * buffer->size);
2385     if(!*result) {
2386         *result = buffer->data;
2387     }
2388     buffer->data = NULL;
2389     hr = S_OK;
2390
2391 error:
2392     if(buffer) {
2393         asm_free(buffer->data);
2394         asm_free(buffer);
2395     }
2396     asm_free(writer);
2397     return hr;
2398 }
2399
2400 void SlDeleteShader(struct bwriter_shader *shader) {
2401     unsigned int i, j;
2402
2403     TRACE("Deleting shader %p\n", shader);
2404
2405     for(i = 0; i < shader->num_cf; i++) {
2406         asm_free(shader->constF[i]);
2407     }
2408     asm_free(shader->constF);
2409     for(i = 0; i < shader->num_ci; i++) {
2410         asm_free(shader->constI[i]);
2411     }
2412     asm_free(shader->constI);
2413     for(i = 0; i < shader->num_cb; i++) {
2414         asm_free(shader->constB[i]);
2415     }
2416     asm_free(shader->constB);
2417
2418     asm_free(shader->inputs);
2419     asm_free(shader->outputs);
2420     asm_free(shader->samplers);
2421
2422     for(i = 0; i < shader->num_instrs; i++) {
2423         for(j = 0; j < shader->instr[i]->num_srcs; j++) {
2424             asm_free(shader->instr[i]->src[j].rel_reg);
2425         }
2426         asm_free(shader->instr[i]->src);
2427         asm_free(shader->instr[i]);
2428     }
2429     asm_free(shader->instr);
2430
2431     asm_free(shader);
2432 }