inetcpl: Added Hebrew translation.
[wine] / dlls / d3dcompiler_43 / bytecodewriter.c
1 /*
2  * Direct3D bytecode output functions
3  *
4  * Copyright 2008 Stefan Dösinger
5  * Copyright 2009 Matteo Bruni
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20  *
21  */
22
23 #include "config.h"
24 #include "wine/port.h"
25 #include "wine/debug.h"
26
27 #include "d3d9types.h"
28 #include "d3dcompiler_private.h"
29
30 WINE_DEFAULT_DEBUG_CHANNEL(asmshader);
31
32 /****************************************************************
33  * General assembler shader construction helper routines follow *
34  ****************************************************************/
35 /* struct instruction *alloc_instr
36  *
37  * Allocates a new instruction structure with srcs registers
38  *
39  * Parameters:
40  *  srcs: Number of source registers to allocate
41  *
42  * Returns:
43  *  A pointer to the allocated instruction structure
44  *  NULL in case of an allocation failure
45  */
46 struct instruction *alloc_instr(unsigned int srcs) {
47     struct instruction *ret = asm_alloc(sizeof(*ret));
48     if(!ret) {
49         ERR("Failed to allocate memory for an instruction structure\n");
50         return NULL;
51     }
52
53     if(srcs) {
54         ret->src = asm_alloc(srcs * sizeof(*ret->src));
55         if(!ret->src) {
56             ERR("Failed to allocate memory for instruction registers\n");
57             asm_free(ret);
58             return NULL;
59         }
60         ret->num_srcs = srcs;
61     }
62     return ret;
63 }
64
65 /* void add_instruction
66  *
67  * Adds a new instruction to the shader's instructions array and grows the instruction array
68  * if needed.
69  *
70  * The function does NOT copy the instruction structure. Make sure not to release the
71  * instruction or any of its substructures like registers.
72  *
73  * Parameters:
74  *  shader: Shader to add the instruction to
75  *  instr: Instruction to add to the shader
76  */
77 BOOL add_instruction(struct bwriter_shader *shader, struct instruction *instr) {
78     struct instruction      **new_instructions;
79
80     if(!shader) return FALSE;
81
82     if(shader->instr_alloc_size == 0) {
83         shader->instr = asm_alloc(sizeof(*shader->instr) * INSTRARRAY_INITIAL_SIZE);
84         if(!shader->instr) {
85             ERR("Failed to allocate the shader instruction array\n");
86             return FALSE;
87         }
88         shader->instr_alloc_size = INSTRARRAY_INITIAL_SIZE;
89     } else if(shader->instr_alloc_size == shader->num_instrs) {
90         new_instructions = asm_realloc(shader->instr,
91                                        sizeof(*shader->instr) * (shader->instr_alloc_size) * 2);
92         if(!new_instructions) {
93             ERR("Failed to grow the shader instruction array\n");
94             return FALSE;
95         }
96         shader->instr = new_instructions;
97         shader->instr_alloc_size = shader->instr_alloc_size * 2;
98     } else if(shader->num_instrs > shader->instr_alloc_size) {
99         ERR("More instructions than allocated. This should not happen\n");
100         return FALSE;
101     }
102
103     shader->instr[shader->num_instrs] = instr;
104     shader->num_instrs++;
105     return TRUE;
106 }
107
108 BOOL add_constF(struct bwriter_shader *shader, DWORD reg, float x, float y, float z, float w) {
109     struct constant *newconst;
110
111     if(shader->num_cf) {
112         struct constant **newarray;
113         newarray = asm_realloc(shader->constF,
114                                sizeof(*shader->constF) * (shader->num_cf + 1));
115         if(!newarray) {
116             ERR("Failed to grow the constants array\n");
117             return FALSE;
118         }
119         shader->constF = newarray;
120     } else {
121         shader->constF = asm_alloc(sizeof(*shader->constF));
122         if(!shader->constF) {
123             ERR("Failed to allocate the constants array\n");
124             return FALSE;
125         }
126     }
127
128     newconst = asm_alloc(sizeof(*newconst));
129     if(!newconst) {
130         ERR("Failed to allocate a new constant\n");
131         return FALSE;
132     }
133     newconst->regnum = reg;
134     newconst->value[0].f = x;
135     newconst->value[1].f = y;
136     newconst->value[2].f = z;
137     newconst->value[3].f = w;
138     shader->constF[shader->num_cf] = newconst;
139
140     shader->num_cf++;
141     return TRUE;
142 }
143
144 BOOL add_constI(struct bwriter_shader *shader, DWORD reg, INT x, INT y, INT z, INT w) {
145     struct constant *newconst;
146
147     if(shader->num_ci) {
148         struct constant **newarray;
149         newarray = asm_realloc(shader->constI,
150                                sizeof(*shader->constI) * (shader->num_ci + 1));
151         if(!newarray) {
152             ERR("Failed to grow the constants array\n");
153             return FALSE;
154         }
155         shader->constI = newarray;
156     } else {
157         shader->constI = asm_alloc(sizeof(*shader->constI));
158         if(!shader->constI) {
159             ERR("Failed to allocate the constants array\n");
160             return FALSE;
161         }
162     }
163
164     newconst = asm_alloc(sizeof(*newconst));
165     if(!newconst) {
166         ERR("Failed to allocate a new constant\n");
167         return FALSE;
168     }
169     newconst->regnum = reg;
170     newconst->value[0].i = x;
171     newconst->value[1].i = y;
172     newconst->value[2].i = z;
173     newconst->value[3].i = w;
174     shader->constI[shader->num_ci] = newconst;
175
176     shader->num_ci++;
177     return TRUE;
178 }
179
180 BOOL add_constB(struct bwriter_shader *shader, DWORD reg, BOOL x) {
181     struct constant *newconst;
182
183     if(shader->num_cb) {
184         struct constant **newarray;
185         newarray = asm_realloc(shader->constB,
186                                sizeof(*shader->constB) * (shader->num_cb + 1));
187         if(!newarray) {
188             ERR("Failed to grow the constants array\n");
189             return FALSE;
190         }
191         shader->constB = newarray;
192     } else {
193         shader->constB = asm_alloc(sizeof(*shader->constB));
194         if(!shader->constB) {
195             ERR("Failed to allocate the constants array\n");
196             return FALSE;
197         }
198     }
199
200     newconst = asm_alloc(sizeof(*newconst));
201     if(!newconst) {
202         ERR("Failed to allocate a new constant\n");
203         return FALSE;
204     }
205     newconst->regnum = reg;
206     newconst->value[0].b = x;
207     shader->constB[shader->num_cb] = newconst;
208
209     shader->num_cb++;
210     return TRUE;
211 }
212
213 BOOL record_declaration(struct bwriter_shader *shader, DWORD usage,
214                         DWORD usage_idx, DWORD mod, BOOL output,
215                         DWORD regnum, DWORD writemask, BOOL builtin) {
216     unsigned int *num;
217     struct declaration **decl;
218     unsigned int i;
219
220     if(!shader) return FALSE;
221
222     if(output) {
223         num = &shader->num_outputs;
224         decl = &shader->outputs;
225     } else {
226         num = &shader->num_inputs;
227         decl = &shader->inputs;
228     }
229
230     if(*num == 0) {
231         *decl = asm_alloc(sizeof(**decl));
232         if(!*decl) {
233             ERR("Error allocating declarations array\n");
234             return FALSE;
235         }
236     } else {
237         struct declaration *newdecl;
238         for(i = 0; i < *num; i++) {
239             if((*decl)[i].regnum == regnum && ((*decl)[i].writemask & writemask)) {
240                 WARN("Declaration of register %u already exists, writemask match 0x%x\n",
241                       regnum, (*decl)[i].writemask & writemask);
242             }
243         }
244
245         newdecl = asm_realloc(*decl,
246                               sizeof(**decl) * ((*num) + 1));
247         if(!newdecl) {
248             ERR("Error reallocating declarations array\n");
249             return FALSE;
250         }
251         *decl = newdecl;
252     }
253     (*decl)[*num].usage = usage;
254     (*decl)[*num].usage_idx = usage_idx;
255     (*decl)[*num].regnum = regnum;
256     (*decl)[*num].mod = mod;
257     (*decl)[*num].writemask = writemask;
258     (*decl)[*num].builtin = builtin;
259     (*num)++;
260
261     return TRUE;
262 }
263
264 BOOL record_sampler(struct bwriter_shader *shader, DWORD samptype, DWORD mod, DWORD regnum) {
265     unsigned int i;
266
267     if(!shader) return FALSE;
268
269     if(shader->num_samplers == 0) {
270         shader->samplers = asm_alloc(sizeof(*shader->samplers));
271         if(!shader->samplers) {
272             ERR("Error allocating samplers array\n");
273             return FALSE;
274         }
275     } else {
276         struct samplerdecl *newarray;
277
278         for(i = 0; i < shader->num_samplers; i++) {
279             if(shader->samplers[i].regnum == regnum) {
280                 WARN("Sampler %u already declared\n", regnum);
281                 /* This is not an error as far as the assembler is concerned.
282                  * Direct3D might refuse to load the compiled shader though
283                  */
284             }
285         }
286
287         newarray = asm_realloc(shader->samplers,
288                                sizeof(*shader->samplers) * (shader->num_samplers + 1));
289         if(!newarray) {
290             ERR("Error reallocating samplers array\n");
291             return FALSE;
292         }
293         shader->samplers = newarray;
294     }
295
296     shader->samplers[shader->num_samplers].type = samptype;
297     shader->samplers[shader->num_samplers].mod = mod;
298     shader->samplers[shader->num_samplers].regnum = regnum;
299     shader->num_samplers++;
300     return TRUE;
301 }
302
303
304 /* shader bytecode buffer manipulation functions.
305  * allocate_buffer creates a new buffer structure, put_dword adds a new
306  * DWORD to the buffer. In the rare case of a memory allocation failure
307  * when trying to grow the buffer a flag is set in the buffer to mark it
308  * invalid. This avoids return value checking and passing in many places
309  */
310 static struct bytecode_buffer *allocate_buffer(void) {
311     struct bytecode_buffer *ret;
312
313     ret = asm_alloc(sizeof(*ret));
314     if(!ret) return NULL;
315
316     ret->alloc_size = BYTECODEBUFFER_INITIAL_SIZE;
317     ret->data = asm_alloc(sizeof(DWORD) * ret->alloc_size);
318     if(!ret->data) {
319         asm_free(ret);
320         return NULL;
321     }
322     ret->state = S_OK;
323     return ret;
324 }
325
326 static void put_dword(struct bytecode_buffer *buffer, DWORD value) {
327     if(FAILED(buffer->state)) return;
328
329     if(buffer->alloc_size == buffer->size) {
330         DWORD *newarray;
331         buffer->alloc_size *= 2;
332         newarray = asm_realloc(buffer->data,
333                                sizeof(DWORD) * buffer->alloc_size);
334         if(!newarray) {
335             ERR("Failed to grow the buffer data memory\n");
336             buffer->state = E_OUTOFMEMORY;
337             return;
338         }
339         buffer->data = newarray;
340     }
341     buffer->data[buffer->size++] = value;
342 }
343
344 /******************************************************
345  * Implementation of the writer functions starts here *
346  ******************************************************/
347 static void write_declarations(struct bc_writer *This,
348                                struct bytecode_buffer *buffer, BOOL len,
349                                const struct declaration *decls, unsigned int num, DWORD type) {
350     DWORD i;
351     DWORD instr_dcl = D3DSIO_DCL;
352     DWORD token;
353     struct shader_reg reg;
354
355     ZeroMemory(&reg, sizeof(reg));
356
357     if(len) {
358         instr_dcl |= 2 << D3DSI_INSTLENGTH_SHIFT;
359     }
360
361     for(i = 0; i < num; i++) {
362         if(decls[i].builtin) continue;
363
364         /* Write the DCL instruction */
365         put_dword(buffer, instr_dcl);
366
367         /* Write the usage and index */
368         token = (1 << 31); /* Bit 31 of non-instruction opcodes is 1 */
369         token |= (decls[i].usage << D3DSP_DCL_USAGE_SHIFT) & D3DSP_DCL_USAGE_MASK;
370         token |= (decls[i].usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT) & D3DSP_DCL_USAGEINDEX_MASK;
371         put_dword(buffer, token);
372
373         /* Write the dest register */
374         reg.type = type;
375         reg.regnum = decls[i].regnum;
376         reg.u.writemask = decls[i].writemask;
377         This->funcs->dstreg(This, &reg, buffer, 0, decls[i].mod);
378     }
379 }
380
381 static void write_const(struct constant **consts, int num, DWORD opcode, DWORD reg_type, struct bytecode_buffer *buffer, BOOL len) {
382     DWORD i;
383     DWORD instr_def = opcode;
384     const DWORD reg = (1<<31) |
385                       ((reg_type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
386                       ((reg_type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
387                       D3DSP_WRITEMASK_ALL;
388
389     if(len) {
390         if(opcode == D3DSIO_DEFB)
391             instr_def |= 2 << D3DSI_INSTLENGTH_SHIFT;
392         else
393             instr_def |= 5 << D3DSI_INSTLENGTH_SHIFT;
394     }
395
396     for(i = 0; i < num; i++) {
397         /* Write the DEF instruction */
398         put_dword(buffer, instr_def);
399
400         put_dword(buffer, reg | (consts[i]->regnum & D3DSP_REGNUM_MASK));
401         put_dword(buffer, consts[i]->value[0].d);
402         if(opcode != D3DSIO_DEFB) {
403             put_dword(buffer, consts[i]->value[1].d);
404             put_dword(buffer, consts[i]->value[2].d);
405             put_dword(buffer, consts[i]->value[3].d);
406         }
407     }
408 }
409
410 static void write_constF(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
411     write_const(shader->constF, shader->num_cf, D3DSIO_DEF, D3DSPR_CONST, buffer, len);
412 }
413
414 /* This function looks for VS 1/2 registers mapping to VS 3 output registers */
415 static HRESULT vs_find_builtin_varyings(struct bc_writer *This, const struct bwriter_shader *shader) {
416     DWORD i;
417     DWORD usage, usage_idx, writemask, regnum;
418
419     for(i = 0; i < shader->num_outputs; i++) {
420         if(!shader->outputs[i].builtin) continue;
421
422         usage = shader->outputs[i].usage;
423         usage_idx = shader->outputs[i].usage_idx;
424         writemask = shader->outputs[i].writemask;
425         regnum = shader->outputs[i].regnum;
426
427         switch(usage) {
428             case BWRITERDECLUSAGE_POSITION:
429             case BWRITERDECLUSAGE_POSITIONT:
430                 if(usage_idx > 0) {
431                     WARN("dcl_position%u not supported in sm 1/2 shaders\n", usage_idx);
432                     return E_INVALIDARG;
433                 }
434                 TRACE("o%u is oPos\n", regnum);
435                 This->oPos_regnum = regnum;
436                 break;
437
438             case BWRITERDECLUSAGE_COLOR:
439                 if(usage_idx > 1) {
440                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
441                     return E_INVALIDARG;
442                 }
443                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
444                     WARN("Only WRITEMASK_ALL is supported on color in sm 1/2\n");
445                     return E_INVALIDARG;
446                 }
447                 TRACE("o%u is oD%u\n", regnum, usage_idx);
448                 This->oD_regnum[usage_idx] = regnum;
449                 break;
450
451             case BWRITERDECLUSAGE_TEXCOORD:
452                 if(usage_idx >= 8) {
453                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
454                     return E_INVALIDARG;
455                 }
456                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
457                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
458                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
459                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
460                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
461                     return E_INVALIDARG;
462                 }
463                 TRACE("o%u is oT%u\n", regnum, usage_idx);
464                 This->oT_regnum[usage_idx] = regnum;
465                 break;
466
467             case BWRITERDECLUSAGE_PSIZE:
468                 if(usage_idx > 0) {
469                     WARN("dcl_psize%u not supported in sm 1/2 shaders\n", usage_idx);
470                     return E_INVALIDARG;
471                 }
472                 TRACE("o%u writemask 0x%08x is oPts\n", regnum, writemask);
473                 This->oPts_regnum = regnum;
474                 This->oPts_mask = writemask;
475                 break;
476
477             case BWRITERDECLUSAGE_FOG:
478                 if(usage_idx > 0) {
479                     WARN("dcl_fog%u not supported in sm 1 shaders\n", usage_idx);
480                     return E_INVALIDARG;
481                 }
482                 if(writemask != BWRITERSP_WRITEMASK_0 && writemask != BWRITERSP_WRITEMASK_1 &&
483                    writemask != BWRITERSP_WRITEMASK_2 && writemask != BWRITERSP_WRITEMASK_3) {
484                     WARN("Unsupported fog writemask\n");
485                     return E_INVALIDARG;
486                 }
487                 TRACE("o%u writemask 0x%08x is oFog\n", regnum, writemask);
488                 This->oFog_regnum = regnum;
489                 This->oFog_mask = writemask;
490                 break;
491
492             default:
493                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
494                 return E_INVALIDARG;
495         }
496     }
497
498     return S_OK;
499 }
500
501 static void vs_1_x_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
502     HRESULT hr;
503
504     if(shader->num_ci || shader->num_cb) {
505         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
506         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
507         This->state = E_INVALIDARG;
508         return;
509     }
510
511     hr = vs_find_builtin_varyings(This, shader);
512     if(FAILED(hr)) {
513         This->state = hr;
514         return;
515     }
516
517     /* Declare the shader type and version */
518     put_dword(buffer, This->version);
519
520     write_declarations(This, buffer, FALSE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
521     write_constF(shader, buffer, FALSE);
522     return;
523 }
524
525 static HRESULT find_ps_builtin_semantics(struct bc_writer *This,
526                                          const struct bwriter_shader *shader,
527                                          DWORD texcoords) {
528     DWORD i;
529     DWORD usage, usage_idx, writemask, regnum;
530
531     This->v_regnum[0] = -1; This->v_regnum[1] = -1;
532     for(i = 0; i < 8; i++) This->t_regnum[i] = -1;
533
534     for(i = 0; i < shader->num_inputs; i++) {
535         if(!shader->inputs[i].builtin) continue;
536
537         usage = shader->inputs[i].usage;
538         usage_idx = shader->inputs[i].usage_idx;
539         writemask = shader->inputs[i].writemask;
540         regnum = shader->inputs[i].regnum;
541
542         switch(usage) {
543             case BWRITERDECLUSAGE_COLOR:
544                 if(usage_idx > 1) {
545                     WARN("dcl_color%u not supported in sm 1 shaders\n", usage_idx);
546                     return E_INVALIDARG;
547                 }
548                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
549                     WARN("Only WRITEMASK_ALL is supported on color in sm 1\n");
550                     return E_INVALIDARG;
551                 }
552                 TRACE("v%u is v%u\n", regnum, usage_idx);
553                 This->v_regnum[usage_idx] = regnum;
554                 break;
555
556             case BWRITERDECLUSAGE_TEXCOORD:
557                 if(usage_idx > texcoords) {
558                     WARN("dcl_texcoord%u not supported in this shader version\n", usage_idx);
559                     return E_INVALIDARG;
560                 }
561                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
562                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
563                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
564                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
565                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
566                 } else {
567                     writemask = BWRITERSP_WRITEMASK_ALL;
568                 }
569                 TRACE("v%u is t%u\n", regnum, usage_idx);
570                 This->t_regnum[usage_idx] = regnum;
571                 break;
572
573             default:
574                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
575                 return E_INVALIDARG;
576         }
577     }
578
579     return S_OK;
580 }
581
582 static void ps_1_x_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
583     HRESULT hr;
584
585     /* First check the constants and varyings, and complain if unsupported things are used */
586     if(shader->num_ci || shader->num_cb) {
587         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
588         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
589         This->state = E_INVALIDARG;
590         return;
591     }
592
593     hr = find_ps_builtin_semantics(This, shader, 4);
594     if(FAILED(hr)) {
595         This->state = hr;
596         return;
597     }
598
599     /* Declare the shader type and version */
600     put_dword(buffer, This->version);
601     write_constF(shader, buffer, TRUE);
602 }
603
604 static void ps_1_4_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
605     HRESULT hr;
606
607     /* First check the constants and varyings, and complain if unsupported things are used */
608     if(shader->num_ci || shader->num_cb) {
609         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
610         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
611         This->state = E_INVALIDARG;
612         return;
613     }
614     hr = find_ps_builtin_semantics(This, shader, 6);
615     if(FAILED(hr)) {
616         This->state = hr;
617         return;
618     }
619
620     /* Declare the shader type and version */
621     put_dword(buffer, This->version);
622     write_constF(shader, buffer, TRUE);
623 }
624
625 static void end(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
626     put_dword(buffer, D3DSIO_END);
627 }
628
629 static DWORD map_vs_output(struct bc_writer *This, DWORD regnum, DWORD mask, DWORD *has_components) {
630     DWORD token = 0;
631     DWORD i;
632
633     *has_components = TRUE;
634     if(regnum == This->oPos_regnum) {
635         token |= (D3DSPR_RASTOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
636         token |= D3DSRO_POSITION & D3DSP_REGNUM_MASK; /* No shift */
637         return token;
638     }
639     if(regnum == This->oFog_regnum && mask == This->oFog_mask) {
640         token |= (D3DSPR_RASTOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
641         token |= D3DSRO_FOG & D3DSP_REGNUM_MASK; /* No shift */
642         token |= D3DSP_WRITEMASK_ALL;
643         *has_components = FALSE;
644         return token;
645     }
646     if(regnum == This->oPts_regnum && mask == This->oPts_mask) {
647         token |= (D3DSPR_RASTOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
648         token |= D3DSRO_POINT_SIZE & D3DSP_REGNUM_MASK; /* No shift */
649         token |= D3DSP_WRITEMASK_ALL;
650         *has_components = FALSE;
651         return token;
652     }
653     for(i = 0; i < 2; i++) {
654         if(regnum == This->oD_regnum[i]) {
655             token |= (D3DSPR_ATTROUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
656             token |= i & D3DSP_REGNUM_MASK; /* No shift */
657             return token;
658         }
659     }
660     for(i = 0; i < 8; i++) {
661         if(regnum == This->oT_regnum[i]) {
662             token |= (D3DSPR_TEXCRDOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
663             token |= i & D3DSP_REGNUM_MASK; /* No shift */
664             return token;
665         }
666     }
667
668     /* The varying must be undeclared - if an unsupported varying was declared,
669      * the vs_find_builtin_varyings function would have caught it and this code
670      * would not run */
671     WARN("Undeclared varying %u\n", regnum);
672     This->state = E_INVALIDARG;
673     return -1;
674 }
675
676 static void vs_12_dstreg(struct bc_writer *This, const struct shader_reg *reg,
677                          struct bytecode_buffer *buffer,
678                          DWORD shift, DWORD mod) {
679     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
680     DWORD has_wmask;
681
682     if(reg->rel_reg) {
683         WARN("Relative addressing not supported for destination registers\n");
684         This->state = E_INVALIDARG;
685         return;
686     }
687
688     switch(reg->type) {
689         case BWRITERSPR_OUTPUT:
690             token |= map_vs_output(This, reg->regnum, reg->u.writemask, &has_wmask);
691             break;
692
693         case BWRITERSPR_RASTOUT:
694         case BWRITERSPR_ATTROUT:
695             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
696             * but are unexpected. If we hit this path it might be due to an error.
697             */
698             FIXME("Unexpected register type %u\n", reg->type);
699             /* drop through */
700         case BWRITERSPR_INPUT:
701         case BWRITERSPR_TEMP:
702         case BWRITERSPR_CONST:
703             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
704             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
705             has_wmask = TRUE;
706             break;
707
708         case BWRITERSPR_ADDR:
709             if(reg->regnum != 0) {
710                 WARN("Only a0 exists\n");
711                 This->state = E_INVALIDARG;
712                 return;
713             }
714             token |= (D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
715             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
716             has_wmask = TRUE;
717             break;
718
719         case BWRITERSPR_PREDICATE:
720             if(This->version != BWRITERVS_VERSION(2, 1)){
721                 WARN("Predicate register is allowed only in vs_2_x\n");
722                 This->state = E_INVALIDARG;
723                 return;
724             }
725             if(reg->regnum != 0) {
726                 WARN("Only predicate register p0 exists\n");
727                 This->state = E_INVALIDARG;
728                 return;
729             }
730             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
731             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
732             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
733             has_wmask = TRUE;
734             break;
735
736         default:
737             WARN("Invalid register type for 1.x-2.x vertex shader\n");
738             This->state = E_INVALIDARG;
739             return;
740     }
741
742     /* strictly speaking there are no modifiers in vs_2_0 and vs_1_x, but they can be written
743      * into the bytecode and since the compiler doesn't do such checks write them
744      * (the checks are done by the undocumented shader validator)
745      */
746     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
747     token |= d3d9_dstmod(mod);
748
749     if(has_wmask) {
750         token |= d3d9_writemask(reg->u.writemask);
751     }
752     put_dword(buffer, token);
753 }
754
755 static void vs_1_x_srcreg(struct bc_writer *This, const struct shader_reg *reg,
756                           struct bytecode_buffer *buffer) {
757     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
758     DWORD has_swizzle;
759     DWORD component;
760
761     switch(reg->type) {
762         case BWRITERSPR_OUTPUT:
763             /* Map the swizzle to a writemask, the format expected
764                by map_vs_output
765              */
766             switch(reg->u.swizzle) {
767                 case BWRITERVS_SWIZZLE_X:
768                     component = BWRITERSP_WRITEMASK_0;
769                     break;
770                 case BWRITERVS_SWIZZLE_Y:
771                     component = BWRITERSP_WRITEMASK_1;
772                     break;
773                 case BWRITERVS_SWIZZLE_Z:
774                     component = BWRITERSP_WRITEMASK_2;
775                     break;
776                 case BWRITERVS_SWIZZLE_W:
777                     component = BWRITERSP_WRITEMASK_3;
778                     break;
779                 default:
780                     component = 0;
781             }
782             token |= map_vs_output(This, reg->regnum, component, &has_swizzle);
783             break;
784
785         case BWRITERSPR_RASTOUT:
786         case BWRITERSPR_ATTROUT:
787             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
788              * but are unexpected. If we hit this path it might be due to an error.
789              */
790             FIXME("Unexpected register type %u\n", reg->type);
791             /* drop through */
792         case BWRITERSPR_INPUT:
793         case BWRITERSPR_TEMP:
794         case BWRITERSPR_CONST:
795         case BWRITERSPR_ADDR:
796             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
797             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
798             if(reg->rel_reg) {
799                 if(reg->rel_reg->type != BWRITERSPR_ADDR ||
800                    reg->rel_reg->regnum != 0 ||
801                    reg->rel_reg->u.swizzle != BWRITERVS_SWIZZLE_X) {
802                     WARN("Relative addressing in vs_1_x is only allowed with a0.x\n");
803                     This->state = E_INVALIDARG;
804                     return;
805                 }
806                 token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
807             }
808             break;
809
810         default:
811             WARN("Invalid register type for 1.x vshader\n");
812             This->state = E_INVALIDARG;
813             return;
814     }
815
816     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
817
818     token |= d3d9_srcmod(reg->srcmod);
819     put_dword(buffer, token);
820 }
821
822 static void write_srcregs(struct bc_writer *This, const struct instruction *instr,
823                           struct bytecode_buffer *buffer){
824     unsigned int i;
825     if(instr->has_predicate){
826         This->funcs->srcreg(This, &instr->predicate, buffer);
827     }
828     for(i = 0; i < instr->num_srcs; i++){
829         This->funcs->srcreg(This, &instr->src[i], buffer);
830     }
831 }
832
833 static DWORD map_ps13_temp(struct bc_writer *This, const struct shader_reg *reg) {
834     DWORD token = 0;
835     if(reg->regnum == T0_REG) {
836         token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
837         token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
838     } else if(reg->regnum == T1_REG) {
839         token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
840         token |= 1 & D3DSP_REGNUM_MASK; /* No shift */
841     } else if(reg->regnum == T2_REG) {
842         token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
843         token |= 2 & D3DSP_REGNUM_MASK; /* No shift */
844     } else if(reg->regnum == T3_REG) {
845         token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
846         token |= 3 & D3DSP_REGNUM_MASK; /* No shift */
847     } else {
848         token |= (D3DSPR_TEMP << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
849         token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
850     }
851     return token;
852 }
853
854 static DWORD map_ps_input(struct bc_writer *This,
855                           const struct shader_reg *reg) {
856     DWORD i, token = 0;
857     /* Map color interpolators */
858     for(i = 0; i < 2; i++) {
859         if(reg->regnum == This->v_regnum[i]) {
860             token |= (D3DSPR_INPUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
861             token |= i & D3DSP_REGNUM_MASK; /* No shift */
862             return token;
863         }
864     }
865     for(i = 0; i < 8; i++) {
866         if(reg->regnum == This->t_regnum[i]) {
867             token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
868             token |= i & D3DSP_REGNUM_MASK; /* No shift */
869             return token;
870         }
871     }
872
873     WARN("Invalid ps 1/2 varying\n");
874     This->state = E_INVALIDARG;
875     return token;
876 }
877
878 static void ps_1_0123_srcreg(struct bc_writer *This, const struct shader_reg *reg,
879                              struct bytecode_buffer *buffer) {
880     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
881     if(reg->rel_reg) {
882         WARN("Relative addressing not supported in <= ps_3_0\n");
883         This->state = E_INVALIDARG;
884         return;
885     }
886
887     switch(reg->type) {
888         case BWRITERSPR_INPUT:
889             token |= map_ps_input(This, reg);
890             break;
891
892             /* Take care about the texture temporaries. There's a problem: They aren't
893              * declared anywhere, so we can only hardcode the values that are used
894              * to map ps_1_3 shaders to the common shader structure
895              */
896         case BWRITERSPR_TEMP:
897             token |= map_ps13_temp(This, reg);
898             break;
899
900         case BWRITERSPR_CONST: /* Can be mapped 1:1 */
901             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
902             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
903             break;
904
905         default:
906             WARN("Invalid register type for <= ps_1_3 shader\n");
907             This->state = E_INVALIDARG;
908             return;
909     }
910
911     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
912
913     if(reg->srcmod == BWRITERSPSM_DZ || reg->srcmod == BWRITERSPSM_DW ||
914        reg->srcmod == BWRITERSPSM_ABS || reg->srcmod == BWRITERSPSM_ABSNEG ||
915        reg->srcmod == BWRITERSPSM_NOT) {
916         WARN("Invalid source modifier %u for <= ps_1_3\n", reg->srcmod);
917         This->state = E_INVALIDARG;
918         return;
919     }
920     token |= d3d9_srcmod(reg->srcmod);
921     put_dword(buffer, token);
922 }
923
924 static void ps_1_0123_dstreg(struct bc_writer *This, const struct shader_reg *reg,
925                              struct bytecode_buffer *buffer,
926                              DWORD shift, DWORD mod) {
927     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
928
929     if(reg->rel_reg) {
930         WARN("Relative addressing not supported for destination registers\n");
931         This->state = E_INVALIDARG;
932         return;
933     }
934
935     switch(reg->type) {
936         case BWRITERSPR_TEMP:
937             token |= map_ps13_temp(This, reg);
938             break;
939
940         /* texkill uses the input register as a destination parameter */
941         case BWRITERSPR_INPUT:
942             token |= map_ps_input(This, reg);
943             break;
944
945         default:
946             WARN("Invalid dest register type for 1.x pshader\n");
947             This->state = E_INVALIDARG;
948             return;
949     }
950
951     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
952     token |= d3d9_dstmod(mod);
953
954     token |= d3d9_writemask(reg->u.writemask);
955     put_dword(buffer, token);
956 }
957
958 /* The length of an instruction consists of the destination register (if any),
959  * the number of source registers, the number of address registers used for
960  * indirect addressing, and optionally the predicate register
961  */
962 static DWORD instrlen(const struct instruction *instr, unsigned int srcs, unsigned int dsts) {
963     unsigned int i;
964     DWORD ret = srcs + dsts + (instr->has_predicate ? 1 : 0);
965
966     if(dsts){
967         if(instr->dst.rel_reg) ret++;
968     }
969     for(i = 0; i < srcs; i++) {
970         if(instr->src[i].rel_reg) ret++;
971     }
972     return ret;
973 }
974
975 static void sm_1_x_opcode(struct bc_writer *This,
976                           const struct instruction *instr,
977                           DWORD token, struct bytecode_buffer *buffer) {
978     /* In sm_1_x instruction length isn't encoded */
979     if(instr->coissue){
980         token |= D3DSI_COISSUE;
981     }
982     put_dword(buffer, token);
983 }
984
985 static void instr_handler(struct bc_writer *This,
986                           const struct instruction *instr,
987                           struct bytecode_buffer *buffer) {
988     DWORD token = d3d9_opcode(instr->opcode);
989
990     This->funcs->opcode(This, instr, token, buffer);
991     if(instr->has_dst) This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
992     write_srcregs(This, instr, buffer);
993 }
994
995 static const struct instr_handler_table vs_1_x_handlers[] = {
996     {BWRITERSIO_ADD,            instr_handler},
997     {BWRITERSIO_NOP,            instr_handler},
998     {BWRITERSIO_MOV,            instr_handler},
999     {BWRITERSIO_SUB,            instr_handler},
1000     {BWRITERSIO_MAD,            instr_handler},
1001     {BWRITERSIO_MUL,            instr_handler},
1002     {BWRITERSIO_RCP,            instr_handler},
1003     {BWRITERSIO_RSQ,            instr_handler},
1004     {BWRITERSIO_DP3,            instr_handler},
1005     {BWRITERSIO_DP4,            instr_handler},
1006     {BWRITERSIO_MIN,            instr_handler},
1007     {BWRITERSIO_MAX,            instr_handler},
1008     {BWRITERSIO_SLT,            instr_handler},
1009     {BWRITERSIO_SGE,            instr_handler},
1010     {BWRITERSIO_EXP,            instr_handler},
1011     {BWRITERSIO_LOG,            instr_handler},
1012     {BWRITERSIO_EXPP,           instr_handler},
1013     {BWRITERSIO_LOGP,           instr_handler},
1014     {BWRITERSIO_DST,            instr_handler},
1015     {BWRITERSIO_FRC,            instr_handler},
1016     {BWRITERSIO_M4x4,           instr_handler},
1017     {BWRITERSIO_M4x3,           instr_handler},
1018     {BWRITERSIO_M3x4,           instr_handler},
1019     {BWRITERSIO_M3x3,           instr_handler},
1020     {BWRITERSIO_M3x2,           instr_handler},
1021     {BWRITERSIO_LIT,            instr_handler},
1022
1023     {BWRITERSIO_END,            NULL}, /* Sentinel value, it signals
1024                                           the end of the list */
1025 };
1026
1027 static const struct bytecode_backend vs_1_x_backend = {
1028     vs_1_x_header,
1029     end,
1030     vs_1_x_srcreg,
1031     vs_12_dstreg,
1032     sm_1_x_opcode,
1033     vs_1_x_handlers
1034 };
1035
1036 static void instr_ps_1_0123_texld(struct bc_writer *This,
1037                                   const struct instruction *instr,
1038                                   struct bytecode_buffer *buffer) {
1039     DWORD idx;
1040     struct shader_reg reg;
1041     DWORD swizzlemask;
1042
1043     if(instr->src[1].type != BWRITERSPR_SAMPLER ||
1044        instr->src[1].regnum > 3) {
1045         WARN("Unsupported sampler type %u regnum %u\n",
1046              instr->src[1].type, instr->src[1].regnum);
1047         This->state = E_INVALIDARG;
1048         return;
1049     } else if(instr->dst.type != BWRITERSPR_TEMP) {
1050         WARN("Can only sample into a temp register\n");
1051         This->state = E_INVALIDARG;
1052         return;
1053     }
1054
1055     idx = instr->src[1].regnum;
1056     if((idx == 0 && instr->dst.regnum != T0_REG) ||
1057        (idx == 1 && instr->dst.regnum != T1_REG) ||
1058        (idx == 2 && instr->dst.regnum != T2_REG) ||
1059        (idx == 3 && instr->dst.regnum != T3_REG)) {
1060         WARN("Sampling from sampler s%u to register r%u is not possible in ps_1_x\n",
1061              idx, instr->dst.regnum);
1062         This->state = E_INVALIDARG;
1063         return;
1064     }
1065     if(instr->src[0].type == BWRITERSPR_INPUT) {
1066         /* A simple non-dependent read tex instruction */
1067         if(instr->src[0].regnum != This->t_regnum[idx]) {
1068             WARN("Cannot sample from s%u with texture address data from interpolator %u\n",
1069                  idx, instr->src[0].regnum);
1070             This->state = E_INVALIDARG;
1071             return;
1072         }
1073         This->funcs->opcode(This, instr, D3DSIO_TEX & D3DSI_OPCODE_MASK, buffer);
1074
1075         /* map the temp dstreg to the ps_1_3 texture temporary register */
1076         This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1077     } else if(instr->src[0].type == BWRITERSPR_TEMP) {
1078
1079         swizzlemask = (3 << BWRITERVS_SWIZZLE_SHIFT) |
1080             (3 << (BWRITERVS_SWIZZLE_SHIFT + 2)) |
1081             (3 << (BWRITERVS_SWIZZLE_SHIFT + 4));
1082         if((instr->src[0].u.swizzle & swizzlemask) == (BWRITERVS_X_X | BWRITERVS_Y_Y | BWRITERVS_Z_Z)) {
1083             TRACE("writing texreg2rgb\n");
1084             This->funcs->opcode(This, instr, D3DSIO_TEXREG2RGB & D3DSI_OPCODE_MASK, buffer);
1085         } else if(instr->src[0].u.swizzle == (BWRITERVS_X_W | BWRITERVS_Y_X | BWRITERVS_Z_X | BWRITERVS_W_X)) {
1086             TRACE("writing texreg2ar\n");
1087             This->funcs->opcode(This, instr, D3DSIO_TEXREG2AR & D3DSI_OPCODE_MASK, buffer);
1088         } else if(instr->src[0].u.swizzle == (BWRITERVS_X_Y | BWRITERVS_Y_Z | BWRITERVS_Z_Z | BWRITERVS_W_Z)) {
1089             TRACE("writing texreg2gb\n");
1090             This->funcs->opcode(This, instr, D3DSIO_TEXREG2GB & D3DSI_OPCODE_MASK, buffer);
1091         } else {
1092             WARN("Unsupported src addr swizzle in dependent texld: 0x%08x\n", instr->src[0].u.swizzle);
1093             This->state = E_INVALIDARG;
1094             return;
1095         }
1096
1097         /* Dst and src reg can be mapped normally. Both registers are temporary registers in the
1098          * source shader and have to be mapped to the temporary form of the texture registers. However,
1099          * the src reg doesn't have a swizzle
1100          */
1101         This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1102         reg = instr->src[0];
1103         reg.u.swizzle = BWRITERVS_NOSWIZZLE;
1104         This->funcs->srcreg(This, &reg, buffer);
1105     } else {
1106         WARN("Invalid address data source register\n");
1107         This->state = E_INVALIDARG;
1108         return;
1109     }
1110 }
1111
1112 static void instr_ps_1_0123_mov(struct bc_writer *This,
1113                                 const struct instruction *instr,
1114                                 struct bytecode_buffer *buffer) {
1115     DWORD token = D3DSIO_MOV & D3DSI_OPCODE_MASK;
1116
1117     if(instr->dst.type == BWRITERSPR_TEMP && instr->src[0].type == BWRITERSPR_INPUT) {
1118         if((instr->dst.regnum == T0_REG && instr->src[0].regnum == This->t_regnum[0]) ||
1119            (instr->dst.regnum == T1_REG && instr->src[0].regnum == This->t_regnum[1]) ||
1120            (instr->dst.regnum == T2_REG && instr->src[0].regnum == This->t_regnum[2]) ||
1121            (instr->dst.regnum == T3_REG && instr->src[0].regnum == This->t_regnum[3])) {
1122             if(instr->dstmod & BWRITERSPDM_SATURATE) {
1123                 This->funcs->opcode(This, instr, D3DSIO_TEXCOORD & D3DSI_OPCODE_MASK, buffer);
1124                 /* Remove the SATURATE flag, it's implicit to the instruction */
1125                 This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod & (~BWRITERSPDM_SATURATE));
1126                 return;
1127             } else {
1128                 WARN("A varying -> temp copy is only supported with the SATURATE modifier in <=ps_1_3\n");
1129                 This->state = E_INVALIDARG;
1130                 return;
1131             }
1132         } else if(instr->src[0].regnum == This->v_regnum[0] ||
1133                   instr->src[0].regnum == This->v_regnum[1]) {
1134             /* Handled by the normal mov below. Just drop out of the if condition */
1135         } else {
1136             WARN("Unsupported varying -> temp mov in <= ps_1_3\n");
1137             This->state = E_INVALIDARG;
1138             return;
1139         }
1140     }
1141
1142     This->funcs->opcode(This, instr, token, buffer);
1143     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1144     This->funcs->srcreg(This, &instr->src[0], buffer);
1145 }
1146
1147 static const struct instr_handler_table ps_1_0123_handlers[] = {
1148     {BWRITERSIO_ADD,            instr_handler},
1149     {BWRITERSIO_NOP,            instr_handler},
1150     {BWRITERSIO_MOV,            instr_ps_1_0123_mov},
1151     {BWRITERSIO_SUB,            instr_handler},
1152     {BWRITERSIO_MAD,            instr_handler},
1153     {BWRITERSIO_MUL,            instr_handler},
1154     {BWRITERSIO_DP3,            instr_handler},
1155     {BWRITERSIO_DP4,            instr_handler},
1156     {BWRITERSIO_LRP,            instr_handler},
1157
1158     /* pshader instructions */
1159     {BWRITERSIO_CND,            instr_handler},
1160     {BWRITERSIO_CMP,            instr_handler},
1161     {BWRITERSIO_TEXKILL,        instr_handler},
1162     {BWRITERSIO_TEX,            instr_ps_1_0123_texld},
1163     {BWRITERSIO_TEXBEM,         instr_handler},
1164     {BWRITERSIO_TEXBEML,        instr_handler},
1165     {BWRITERSIO_TEXM3x2PAD,     instr_handler},
1166     {BWRITERSIO_TEXM3x3PAD,     instr_handler},
1167     {BWRITERSIO_TEXM3x3SPEC,    instr_handler},
1168     {BWRITERSIO_TEXM3x3VSPEC,   instr_handler},
1169     {BWRITERSIO_TEXM3x3TEX,     instr_handler},
1170     {BWRITERSIO_TEXM3x3,        instr_handler},
1171     {BWRITERSIO_TEXM3x2DEPTH,   instr_handler},
1172     {BWRITERSIO_TEXM3x2TEX,     instr_handler},
1173     {BWRITERSIO_TEXDP3,         instr_handler},
1174     {BWRITERSIO_TEXDP3TEX,      instr_handler},
1175     {BWRITERSIO_END,            NULL},
1176 };
1177
1178 static const struct bytecode_backend ps_1_0123_backend = {
1179     ps_1_x_header,
1180     end,
1181     ps_1_0123_srcreg,
1182     ps_1_0123_dstreg,
1183     sm_1_x_opcode,
1184     ps_1_0123_handlers
1185 };
1186
1187 static void ps_1_4_srcreg(struct bc_writer *This, const struct shader_reg *reg,
1188                           struct bytecode_buffer *buffer) {
1189     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1190     if(reg->rel_reg) {
1191         WARN("Relative addressing not supported in <= ps_3_0\n");
1192         This->state = E_INVALIDARG;
1193         return;
1194     }
1195
1196     switch(reg->type) {
1197         case BWRITERSPR_INPUT:
1198             token |= map_ps_input(This, reg);
1199             break;
1200
1201         /* Can be mapped 1:1 */
1202         case BWRITERSPR_TEMP:
1203         case BWRITERSPR_CONST:
1204             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1205             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1206             break;
1207
1208         default:
1209             WARN("Invalid register type for ps_1_4 shader\n");
1210             This->state = E_INVALIDARG;
1211             return;
1212     }
1213
1214     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1215
1216     if(reg->srcmod == BWRITERSPSM_ABS || reg->srcmod == BWRITERSPSM_ABSNEG ||
1217        reg->srcmod == BWRITERSPSM_NOT) {
1218         WARN("Invalid source modifier %u for ps_1_4\n", reg->srcmod);
1219         This->state = E_INVALIDARG;
1220         return;
1221     }
1222     token |= d3d9_srcmod(reg->srcmod);
1223     put_dword(buffer, token);
1224 }
1225
1226 static void ps_1_4_dstreg(struct bc_writer *This, const struct shader_reg *reg,
1227                           struct bytecode_buffer *buffer,
1228                           DWORD shift, DWORD mod) {
1229     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1230
1231     if(reg->rel_reg) {
1232         WARN("Relative addressing not supported for destination registers\n");
1233         This->state = E_INVALIDARG;
1234         return;
1235     }
1236
1237     switch(reg->type) {
1238         case BWRITERSPR_TEMP: /* 1:1 mapping */
1239             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1240             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1241             break;
1242
1243         /* For texkill */
1244         case BWRITERSPR_INPUT:
1245             token |= map_ps_input(This, reg);
1246             break;
1247
1248         default:
1249             WARN("Invalid dest register type for 1.x pshader\n");
1250             This->state = E_INVALIDARG;
1251             return;
1252     }
1253
1254     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1255     token |= d3d9_dstmod(mod);
1256
1257     token |= d3d9_writemask(reg->u.writemask);
1258     put_dword(buffer, token);
1259 }
1260
1261 static void instr_ps_1_4_mov(struct bc_writer *This,
1262                              const struct instruction *instr,
1263                              struct bytecode_buffer *buffer) {
1264     DWORD token = D3DSIO_MOV & D3DSI_OPCODE_MASK;
1265
1266     if(instr->dst.type == BWRITERSPR_TEMP && instr->src[0].type == BWRITERSPR_INPUT) {
1267         if(instr->src[0].regnum == This->t_regnum[0] ||
1268            instr->src[0].regnum == This->t_regnum[1] ||
1269            instr->src[0].regnum == This->t_regnum[2] ||
1270            instr->src[0].regnum == This->t_regnum[3] ||
1271            instr->src[0].regnum == This->t_regnum[4] ||
1272            instr->src[0].regnum == This->t_regnum[5]) {
1273             /* Similar to a regular mov, but a different opcode */
1274             token = D3DSIO_TEXCOORD & D3DSI_OPCODE_MASK;
1275         } else if(instr->src[0].regnum == This->v_regnum[0] ||
1276                   instr->src[0].regnum == This->v_regnum[1]) {
1277             /* Handled by the normal mov below. Just drop out of the if condition */
1278         } else {
1279             WARN("Unsupported varying -> temp mov in ps_1_4\n");
1280             This->state = E_INVALIDARG;
1281             return;
1282         }
1283     }
1284
1285     This->funcs->opcode(This, instr, token, buffer);
1286     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1287     This->funcs->srcreg(This, &instr->src[0], buffer);
1288 }
1289
1290 static void instr_ps_1_4_texld(struct bc_writer *This,
1291                                const struct instruction *instr,
1292                                struct bytecode_buffer *buffer) {
1293     if(instr->src[1].type != BWRITERSPR_SAMPLER ||
1294        instr->src[1].regnum > 5) {
1295         WARN("Unsupported sampler type %u regnum %u\n",
1296              instr->src[1].type, instr->src[1].regnum);
1297         This->state = E_INVALIDARG;
1298         return;
1299     } else if(instr->dst.type != BWRITERSPR_TEMP) {
1300         WARN("Can only sample into a temp register\n");
1301         This->state = E_INVALIDARG;
1302         return;
1303     }
1304
1305     if(instr->src[1].regnum != instr->dst.regnum) {
1306         WARN("Sampling from sampler s%u to register r%u is not possible in ps_1_4\n",
1307              instr->src[1].regnum, instr->dst.regnum);
1308         This->state = E_INVALIDARG;
1309         return;
1310     }
1311
1312     This->funcs->opcode(This, instr, D3DSIO_TEX & D3DSI_OPCODE_MASK, buffer);
1313     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1314     This->funcs->srcreg(This, &instr->src[0], buffer);
1315 }
1316
1317 static const struct instr_handler_table ps_1_4_handlers[] = {
1318     {BWRITERSIO_ADD,            instr_handler},
1319     {BWRITERSIO_NOP,            instr_handler},
1320     {BWRITERSIO_MOV,            instr_ps_1_4_mov},
1321     {BWRITERSIO_SUB,            instr_handler},
1322     {BWRITERSIO_MAD,            instr_handler},
1323     {BWRITERSIO_MUL,            instr_handler},
1324     {BWRITERSIO_DP3,            instr_handler},
1325     {BWRITERSIO_DP4,            instr_handler},
1326     {BWRITERSIO_LRP,            instr_handler},
1327
1328     /* pshader instructions */
1329     {BWRITERSIO_CND,            instr_handler},
1330     {BWRITERSIO_CMP,            instr_handler},
1331     {BWRITERSIO_TEXKILL,        instr_handler},
1332     {BWRITERSIO_TEX,            instr_ps_1_4_texld},
1333     {BWRITERSIO_TEXDEPTH,       instr_handler},
1334     {BWRITERSIO_BEM,            instr_handler},
1335
1336     {BWRITERSIO_PHASE,          instr_handler},
1337     {BWRITERSIO_END,            NULL},
1338 };
1339
1340 static const struct bytecode_backend ps_1_4_backend = {
1341     ps_1_4_header,
1342     end,
1343     ps_1_4_srcreg,
1344     ps_1_4_dstreg,
1345     sm_1_x_opcode,
1346     ps_1_4_handlers
1347 };
1348
1349 static void write_constB(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
1350     write_const(shader->constB, shader->num_cb, D3DSIO_DEFB, D3DSPR_CONSTBOOL, buffer, len);
1351 }
1352
1353 static void write_constI(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
1354     write_const(shader->constI, shader->num_ci, D3DSIO_DEFI, D3DSPR_CONSTINT, buffer, len);
1355 }
1356
1357 static void vs_2_header(struct bc_writer *This,
1358                         const struct bwriter_shader *shader,
1359                         struct bytecode_buffer *buffer) {
1360     HRESULT hr;
1361
1362     hr = vs_find_builtin_varyings(This, shader);
1363     if(FAILED(hr)) {
1364         This->state = hr;
1365         return;
1366     }
1367
1368     /* Declare the shader type and version */
1369     put_dword(buffer, This->version);
1370
1371     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1372     write_constF(shader, buffer, TRUE);
1373     write_constB(shader, buffer, TRUE);
1374     write_constI(shader, buffer, TRUE);
1375     return;
1376 }
1377
1378 static void vs_2_srcreg(struct bc_writer *This,
1379                         const struct shader_reg *reg,
1380                         struct bytecode_buffer *buffer) {
1381     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1382     DWORD has_swizzle;
1383     DWORD component;
1384     DWORD d3d9reg;
1385
1386     switch(reg->type) {
1387         case BWRITERSPR_OUTPUT:
1388             /* Map the swizzle to a writemask, the format expected
1389                by map_vs_output
1390              */
1391             switch(reg->u.swizzle) {
1392                 case BWRITERVS_SWIZZLE_X:
1393                     component = BWRITERSP_WRITEMASK_0;
1394                     break;
1395                 case BWRITERVS_SWIZZLE_Y:
1396                     component = BWRITERSP_WRITEMASK_1;
1397                     break;
1398                 case BWRITERVS_SWIZZLE_Z:
1399                     component = BWRITERSP_WRITEMASK_2;
1400                     break;
1401                 case BWRITERVS_SWIZZLE_W:
1402                     component = BWRITERSP_WRITEMASK_3;
1403                     break;
1404                 default:
1405                     component = 0;
1406             }
1407             token |= map_vs_output(This, reg->regnum, component, &has_swizzle);
1408             break;
1409
1410         case BWRITERSPR_RASTOUT:
1411         case BWRITERSPR_ATTROUT:
1412             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
1413              * but are unexpected. If we hit this path it might be due to an error.
1414              */
1415             FIXME("Unexpected register type %u\n", reg->type);
1416             /* drop through */
1417         case BWRITERSPR_INPUT:
1418         case BWRITERSPR_TEMP:
1419         case BWRITERSPR_CONST:
1420         case BWRITERSPR_ADDR:
1421         case BWRITERSPR_CONSTINT:
1422         case BWRITERSPR_CONSTBOOL:
1423         case BWRITERSPR_LABEL:
1424             d3d9reg = d3d9_register(reg->type);
1425             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1426             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1427             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1428             break;
1429
1430         case BWRITERSPR_LOOP:
1431             if(reg->regnum != 0) {
1432                 WARN("Only regnum 0 is supported for the loop index register in vs_2_0\n");
1433                 This->state = E_INVALIDARG;
1434                 return;
1435             }
1436             token |= (D3DSPR_LOOP << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1437             token |= (D3DSPR_LOOP << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1438             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
1439             break;
1440
1441         case BWRITERSPR_PREDICATE:
1442             if(This->version != BWRITERVS_VERSION(2, 1)){
1443                 WARN("Predicate register is allowed only in vs_2_x\n");
1444                 This->state = E_INVALIDARG;
1445                 return;
1446             }
1447             if(reg->regnum > 0) {
1448                 WARN("Only predicate register 0 is supported\n");
1449                 This->state = E_INVALIDARG;
1450                 return;
1451             }
1452             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1453             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1454             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
1455
1456             break;
1457
1458         default:
1459             WARN("Invalid register type for 2.0 vshader\n");
1460             This->state = E_INVALIDARG;
1461             return;
1462     }
1463
1464     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1465
1466     token |= d3d9_srcmod(reg->srcmod);
1467
1468     if(reg->rel_reg)
1469         token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1470
1471     put_dword(buffer, token);
1472
1473     /* vs_2_0 and newer write the register containing the index explicitly in the
1474      * binary code
1475      */
1476     if(token & D3DVS_ADDRMODE_RELATIVE)
1477         vs_2_srcreg(This, reg->rel_reg, buffer);
1478 }
1479
1480 static void sm_2_opcode(struct bc_writer *This,
1481                         const struct instruction *instr,
1482                         DWORD token, struct bytecode_buffer *buffer) {
1483     /* From sm 2 onwards instruction length is encoded in the opcode field */
1484     int dsts = instr->has_dst ? 1 : 0;
1485     token |= instrlen(instr, instr->num_srcs, dsts) << D3DSI_INSTLENGTH_SHIFT;
1486     if(instr->comptype)
1487         token |= (d3d9_comparetype(instr->comptype) << 16) & (0xf << 16);
1488     if(instr->has_predicate)
1489         token |= D3DSHADER_INSTRUCTION_PREDICATED;
1490     put_dword(buffer,token);
1491 }
1492
1493 static const struct instr_handler_table vs_2_0_handlers[] = {
1494     {BWRITERSIO_ADD,            instr_handler},
1495     {BWRITERSIO_NOP,            instr_handler},
1496     {BWRITERSIO_MOV,            instr_handler},
1497     {BWRITERSIO_SUB,            instr_handler},
1498     {BWRITERSIO_MAD,            instr_handler},
1499     {BWRITERSIO_MUL,            instr_handler},
1500     {BWRITERSIO_RCP,            instr_handler},
1501     {BWRITERSIO_RSQ,            instr_handler},
1502     {BWRITERSIO_DP3,            instr_handler},
1503     {BWRITERSIO_DP4,            instr_handler},
1504     {BWRITERSIO_MIN,            instr_handler},
1505     {BWRITERSIO_MAX,            instr_handler},
1506     {BWRITERSIO_SLT,            instr_handler},
1507     {BWRITERSIO_SGE,            instr_handler},
1508     {BWRITERSIO_ABS,            instr_handler},
1509     {BWRITERSIO_EXP,            instr_handler},
1510     {BWRITERSIO_LOG,            instr_handler},
1511     {BWRITERSIO_EXPP,           instr_handler},
1512     {BWRITERSIO_LOGP,           instr_handler},
1513     {BWRITERSIO_DST,            instr_handler},
1514     {BWRITERSIO_LRP,            instr_handler},
1515     {BWRITERSIO_FRC,            instr_handler},
1516     {BWRITERSIO_CRS,            instr_handler},
1517     {BWRITERSIO_SGN,            instr_handler},
1518     {BWRITERSIO_NRM,            instr_handler},
1519     {BWRITERSIO_SINCOS,         instr_handler},
1520     {BWRITERSIO_M4x4,           instr_handler},
1521     {BWRITERSIO_M4x3,           instr_handler},
1522     {BWRITERSIO_M3x4,           instr_handler},
1523     {BWRITERSIO_M3x3,           instr_handler},
1524     {BWRITERSIO_M3x2,           instr_handler},
1525     {BWRITERSIO_LIT,            instr_handler},
1526     {BWRITERSIO_POW,            instr_handler},
1527     {BWRITERSIO_MOVA,           instr_handler},
1528
1529     {BWRITERSIO_CALL,           instr_handler},
1530     {BWRITERSIO_CALLNZ,         instr_handler},
1531     {BWRITERSIO_REP,            instr_handler},
1532     {BWRITERSIO_ENDREP,         instr_handler},
1533     {BWRITERSIO_IF,             instr_handler},
1534     {BWRITERSIO_LABEL,          instr_handler},
1535     {BWRITERSIO_ELSE,           instr_handler},
1536     {BWRITERSIO_ENDIF,          instr_handler},
1537     {BWRITERSIO_LOOP,           instr_handler},
1538     {BWRITERSIO_RET,            instr_handler},
1539     {BWRITERSIO_ENDLOOP,        instr_handler},
1540
1541     {BWRITERSIO_END,            NULL},
1542 };
1543
1544 static const struct bytecode_backend vs_2_0_backend = {
1545     vs_2_header,
1546     end,
1547     vs_2_srcreg,
1548     vs_12_dstreg,
1549     sm_2_opcode,
1550     vs_2_0_handlers
1551 };
1552
1553 static const struct instr_handler_table vs_2_x_handlers[] = {
1554     {BWRITERSIO_ADD,            instr_handler},
1555     {BWRITERSIO_NOP,            instr_handler},
1556     {BWRITERSIO_MOV,            instr_handler},
1557     {BWRITERSIO_SUB,            instr_handler},
1558     {BWRITERSIO_MAD,            instr_handler},
1559     {BWRITERSIO_MUL,            instr_handler},
1560     {BWRITERSIO_RCP,            instr_handler},
1561     {BWRITERSIO_RSQ,            instr_handler},
1562     {BWRITERSIO_DP3,            instr_handler},
1563     {BWRITERSIO_DP4,            instr_handler},
1564     {BWRITERSIO_MIN,            instr_handler},
1565     {BWRITERSIO_MAX,            instr_handler},
1566     {BWRITERSIO_SLT,            instr_handler},
1567     {BWRITERSIO_SGE,            instr_handler},
1568     {BWRITERSIO_ABS,            instr_handler},
1569     {BWRITERSIO_EXP,            instr_handler},
1570     {BWRITERSIO_LOG,            instr_handler},
1571     {BWRITERSIO_EXPP,           instr_handler},
1572     {BWRITERSIO_LOGP,           instr_handler},
1573     {BWRITERSIO_DST,            instr_handler},
1574     {BWRITERSIO_LRP,            instr_handler},
1575     {BWRITERSIO_FRC,            instr_handler},
1576     {BWRITERSIO_CRS,            instr_handler},
1577     {BWRITERSIO_SGN,            instr_handler},
1578     {BWRITERSIO_NRM,            instr_handler},
1579     {BWRITERSIO_SINCOS,         instr_handler},
1580     {BWRITERSIO_M4x4,           instr_handler},
1581     {BWRITERSIO_M4x3,           instr_handler},
1582     {BWRITERSIO_M3x4,           instr_handler},
1583     {BWRITERSIO_M3x3,           instr_handler},
1584     {BWRITERSIO_M3x2,           instr_handler},
1585     {BWRITERSIO_LIT,            instr_handler},
1586     {BWRITERSIO_POW,            instr_handler},
1587     {BWRITERSIO_MOVA,           instr_handler},
1588
1589     {BWRITERSIO_CALL,           instr_handler},
1590     {BWRITERSIO_CALLNZ,         instr_handler},
1591     {BWRITERSIO_REP,            instr_handler},
1592     {BWRITERSIO_ENDREP,         instr_handler},
1593     {BWRITERSIO_IF,             instr_handler},
1594     {BWRITERSIO_LABEL,          instr_handler},
1595     {BWRITERSIO_IFC,            instr_handler},
1596     {BWRITERSIO_ELSE,           instr_handler},
1597     {BWRITERSIO_ENDIF,          instr_handler},
1598     {BWRITERSIO_BREAK,          instr_handler},
1599     {BWRITERSIO_BREAKC,         instr_handler},
1600     {BWRITERSIO_LOOP,           instr_handler},
1601     {BWRITERSIO_RET,            instr_handler},
1602     {BWRITERSIO_ENDLOOP,        instr_handler},
1603
1604     {BWRITERSIO_SETP,           instr_handler},
1605     {BWRITERSIO_BREAKP,         instr_handler},
1606
1607     {BWRITERSIO_END,            NULL},
1608 };
1609
1610 static const struct bytecode_backend vs_2_x_backend = {
1611     vs_2_header,
1612     end,
1613     vs_2_srcreg,
1614     vs_12_dstreg,
1615     sm_2_opcode,
1616     vs_2_x_handlers
1617 };
1618
1619 static void write_samplers(const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1620     DWORD i;
1621     DWORD instr_dcl = D3DSIO_DCL | (2 << D3DSI_INSTLENGTH_SHIFT);
1622     DWORD token;
1623     const DWORD reg = (1<<31) |
1624         ((D3DSPR_SAMPLER << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
1625         ((D3DSPR_SAMPLER << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
1626         D3DSP_WRITEMASK_ALL;
1627
1628     for(i = 0; i < shader->num_samplers; i++) {
1629         /* Write the DCL instruction */
1630         put_dword(buffer, instr_dcl);
1631         token = (1<<31);
1632         /* Already shifted */
1633         token |= (d3d9_sampler(shader->samplers[i].type)) & D3DSP_TEXTURETYPE_MASK;
1634         put_dword(buffer, token);
1635         token = reg | (shader->samplers[i].regnum & D3DSP_REGNUM_MASK);
1636         token |= d3d9_dstmod(shader->samplers[i].mod);
1637         put_dword(buffer, token);
1638     }
1639 }
1640
1641 static void ps_2_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1642     HRESULT hr = find_ps_builtin_semantics(This, shader, 8);
1643     if(FAILED(hr)) {
1644         This->state = hr;
1645         return;
1646     }
1647
1648     /* Declare the shader type and version */
1649     put_dword(buffer, This->version);
1650     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1651     write_samplers(shader, buffer);
1652     write_constF(shader, buffer, TRUE);
1653     write_constB(shader, buffer, TRUE);
1654     write_constI(shader, buffer, TRUE);
1655 }
1656
1657 static void ps_2_srcreg(struct bc_writer *This,
1658                         const struct shader_reg *reg,
1659                         struct bytecode_buffer *buffer) {
1660     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1661     DWORD d3d9reg;
1662     if(reg->rel_reg) {
1663         WARN("Relative addressing not supported in <= ps_3_0\n");
1664         This->state = E_INVALIDARG;
1665         return;
1666     }
1667
1668     switch(reg->type) {
1669         case BWRITERSPR_INPUT:
1670             token |= map_ps_input(This, reg);
1671             break;
1672
1673             /* Can be mapped 1:1 */
1674         case BWRITERSPR_TEMP:
1675         case BWRITERSPR_CONST:
1676         case BWRITERSPR_COLOROUT:
1677         case BWRITERSPR_CONSTBOOL:
1678         case BWRITERSPR_CONSTINT:
1679         case BWRITERSPR_SAMPLER:
1680         case BWRITERSPR_LABEL:
1681         case BWRITERSPR_DEPTHOUT:
1682             d3d9reg = d3d9_register(reg->type);
1683             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1684             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1685             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1686             break;
1687
1688         case BWRITERSPR_PREDICATE:
1689             if(This->version != BWRITERPS_VERSION(2, 1)){
1690                 WARN("Predicate register not supported in ps_2_0\n");
1691                 This->state = E_INVALIDARG;
1692             }
1693             if(reg->regnum) {
1694                 WARN("Predicate register with regnum %u not supported\n",
1695                      reg->regnum);
1696                 This->state = E_INVALIDARG;
1697             }
1698             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1699             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1700             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
1701             break;
1702
1703         default:
1704             WARN("Invalid register type for ps_2_0 shader\n");
1705             This->state = E_INVALIDARG;
1706             return;
1707     }
1708
1709     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1710
1711     token |= d3d9_srcmod(reg->srcmod);
1712     put_dword(buffer, token);
1713 }
1714
1715 static void ps_2_0_dstreg(struct bc_writer *This,
1716                           const struct shader_reg *reg,
1717                           struct bytecode_buffer *buffer,
1718                           DWORD shift, DWORD mod) {
1719     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1720     DWORD d3d9reg;
1721
1722     if(reg->rel_reg) {
1723         WARN("Relative addressing not supported for destination registers\n");
1724         This->state = E_INVALIDARG;
1725         return;
1726     }
1727
1728     switch(reg->type) {
1729         case BWRITERSPR_TEMP: /* 1:1 mapping */
1730         case BWRITERSPR_COLOROUT:
1731         case BWRITERSPR_DEPTHOUT:
1732             d3d9reg = d3d9_register(reg->type);
1733             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1734             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1735             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1736             break;
1737
1738         case BWRITERSPR_PREDICATE:
1739             if(This->version != BWRITERPS_VERSION(2, 1)){
1740                 WARN("Predicate register not supported in ps_2_0\n");
1741                 This->state = E_INVALIDARG;
1742             }
1743             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1744             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1745             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1746             break;
1747
1748         /* texkill uses the input register as a destination parameter */
1749         case BWRITERSPR_INPUT:
1750             token |= map_ps_input(This, reg);
1751             break;
1752
1753         default:
1754             WARN("Invalid dest register type for 2.x pshader\n");
1755             This->state = E_INVALIDARG;
1756             return;
1757     }
1758
1759     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1760     token |= d3d9_dstmod(mod);
1761
1762     token |= d3d9_writemask(reg->u.writemask);
1763     put_dword(buffer, token);
1764 }
1765
1766 static const struct instr_handler_table ps_2_0_handlers[] = {
1767     {BWRITERSIO_ADD,            instr_handler},
1768     {BWRITERSIO_NOP,            instr_handler},
1769     {BWRITERSIO_MOV,            instr_handler},
1770     {BWRITERSIO_SUB,            instr_handler},
1771     {BWRITERSIO_MAD,            instr_handler},
1772     {BWRITERSIO_MUL,            instr_handler},
1773     {BWRITERSIO_RCP,            instr_handler},
1774     {BWRITERSIO_RSQ,            instr_handler},
1775     {BWRITERSIO_DP3,            instr_handler},
1776     {BWRITERSIO_DP4,            instr_handler},
1777     {BWRITERSIO_MIN,            instr_handler},
1778     {BWRITERSIO_MAX,            instr_handler},
1779     {BWRITERSIO_ABS,            instr_handler},
1780     {BWRITERSIO_EXP,            instr_handler},
1781     {BWRITERSIO_LOG,            instr_handler},
1782     {BWRITERSIO_EXPP,           instr_handler},
1783     {BWRITERSIO_LOGP,           instr_handler},
1784     {BWRITERSIO_LRP,            instr_handler},
1785     {BWRITERSIO_FRC,            instr_handler},
1786     {BWRITERSIO_CRS,            instr_handler},
1787     {BWRITERSIO_NRM,            instr_handler},
1788     {BWRITERSIO_SINCOS,         instr_handler},
1789     {BWRITERSIO_M4x4,           instr_handler},
1790     {BWRITERSIO_M4x3,           instr_handler},
1791     {BWRITERSIO_M3x4,           instr_handler},
1792     {BWRITERSIO_M3x3,           instr_handler},
1793     {BWRITERSIO_M3x2,           instr_handler},
1794     {BWRITERSIO_POW,            instr_handler},
1795     {BWRITERSIO_DP2ADD,         instr_handler},
1796     {BWRITERSIO_CMP,            instr_handler},
1797
1798     {BWRITERSIO_TEX,            instr_handler},
1799     {BWRITERSIO_TEXLDP,         instr_handler},
1800     {BWRITERSIO_TEXLDB,         instr_handler},
1801     {BWRITERSIO_TEXKILL,        instr_handler},
1802
1803     {BWRITERSIO_END,            NULL},
1804 };
1805
1806 static const struct bytecode_backend ps_2_0_backend = {
1807     ps_2_header,
1808     end,
1809     ps_2_srcreg,
1810     ps_2_0_dstreg,
1811     sm_2_opcode,
1812     ps_2_0_handlers
1813 };
1814
1815 static const struct instr_handler_table ps_2_x_handlers[] = {
1816     {BWRITERSIO_ADD,            instr_handler},
1817     {BWRITERSIO_NOP,            instr_handler},
1818     {BWRITERSIO_MOV,            instr_handler},
1819     {BWRITERSIO_SUB,            instr_handler},
1820     {BWRITERSIO_MAD,            instr_handler},
1821     {BWRITERSIO_MUL,            instr_handler},
1822     {BWRITERSIO_RCP,            instr_handler},
1823     {BWRITERSIO_RSQ,            instr_handler},
1824     {BWRITERSIO_DP3,            instr_handler},
1825     {BWRITERSIO_DP4,            instr_handler},
1826     {BWRITERSIO_MIN,            instr_handler},
1827     {BWRITERSIO_MAX,            instr_handler},
1828     {BWRITERSIO_ABS,            instr_handler},
1829     {BWRITERSIO_EXP,            instr_handler},
1830     {BWRITERSIO_LOG,            instr_handler},
1831     {BWRITERSIO_EXPP,           instr_handler},
1832     {BWRITERSIO_LOGP,           instr_handler},
1833     {BWRITERSIO_LRP,            instr_handler},
1834     {BWRITERSIO_FRC,            instr_handler},
1835     {BWRITERSIO_CRS,            instr_handler},
1836     {BWRITERSIO_NRM,            instr_handler},
1837     {BWRITERSIO_SINCOS,         instr_handler},
1838     {BWRITERSIO_M4x4,           instr_handler},
1839     {BWRITERSIO_M4x3,           instr_handler},
1840     {BWRITERSIO_M3x4,           instr_handler},
1841     {BWRITERSIO_M3x3,           instr_handler},
1842     {BWRITERSIO_M3x2,           instr_handler},
1843     {BWRITERSIO_POW,            instr_handler},
1844     {BWRITERSIO_DP2ADD,         instr_handler},
1845     {BWRITERSIO_CMP,            instr_handler},
1846
1847     {BWRITERSIO_CALL,           instr_handler},
1848     {BWRITERSIO_CALLNZ,         instr_handler},
1849     {BWRITERSIO_REP,            instr_handler},
1850     {BWRITERSIO_ENDREP,         instr_handler},
1851     {BWRITERSIO_IF,             instr_handler},
1852     {BWRITERSIO_LABEL,          instr_handler},
1853     {BWRITERSIO_IFC,            instr_handler},
1854     {BWRITERSIO_ELSE,           instr_handler},
1855     {BWRITERSIO_ENDIF,          instr_handler},
1856     {BWRITERSIO_BREAK,          instr_handler},
1857     {BWRITERSIO_BREAKC,         instr_handler},
1858     {BWRITERSIO_RET,            instr_handler},
1859
1860     {BWRITERSIO_TEX,            instr_handler},
1861     {BWRITERSIO_TEXLDP,         instr_handler},
1862     {BWRITERSIO_TEXLDB,         instr_handler},
1863     {BWRITERSIO_TEXKILL,        instr_handler},
1864     {BWRITERSIO_DSX,            instr_handler},
1865     {BWRITERSIO_DSY,            instr_handler},
1866
1867     {BWRITERSIO_SETP,           instr_handler},
1868     {BWRITERSIO_BREAKP,         instr_handler},
1869
1870     {BWRITERSIO_TEXLDD,         instr_handler},
1871
1872     {BWRITERSIO_END,            NULL},
1873 };
1874
1875 static const struct bytecode_backend ps_2_x_backend = {
1876     ps_2_header,
1877     end,
1878     ps_2_srcreg,
1879     ps_2_0_dstreg,
1880     sm_2_opcode,
1881     ps_2_x_handlers
1882 };
1883
1884 static void sm_3_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1885     /* Declare the shader type and version */
1886     put_dword(buffer, This->version);
1887
1888     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1889     write_declarations(This, buffer, TRUE, shader->outputs, shader->num_outputs, BWRITERSPR_OUTPUT);
1890     write_constF(shader, buffer, TRUE);
1891     write_constB(shader, buffer, TRUE);
1892     write_constI(shader, buffer, TRUE);
1893     write_samplers(shader, buffer);
1894     return;
1895 }
1896
1897 static void sm_3_srcreg(struct bc_writer *This,
1898                         const struct shader_reg *reg,
1899                         struct bytecode_buffer *buffer) {
1900     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1901     DWORD d3d9reg;
1902
1903     d3d9reg = d3d9_register(reg->type);
1904     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1905     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1906     token |= reg->regnum & D3DSP_REGNUM_MASK;
1907
1908     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK;
1909     token |= d3d9_srcmod(reg->srcmod);
1910
1911     if(reg->rel_reg) {
1912         if(reg->type == BWRITERSPR_CONST && This->version == BWRITERPS_VERSION(3, 0)) {
1913             WARN("c%u[...] is unsupported in ps_3_0\n", reg->regnum);
1914             This->state = E_INVALIDARG;
1915             return;
1916         }
1917         if(((reg->rel_reg->type == BWRITERSPR_ADDR && This->version == BWRITERVS_VERSION(3, 0)) ||
1918            reg->rel_reg->type == BWRITERSPR_LOOP) &&
1919            reg->rel_reg->regnum == 0) {
1920             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1921         } else {
1922             WARN("Unsupported relative addressing register\n");
1923             This->state = E_INVALIDARG;
1924             return;
1925         }
1926     }
1927
1928     put_dword(buffer, token);
1929
1930     /* vs_2_0 and newer write the register containing the index explicitly in the
1931      * binary code
1932      */
1933     if(token & D3DVS_ADDRMODE_RELATIVE) {
1934         sm_3_srcreg(This, reg->rel_reg, buffer);
1935     }
1936 }
1937
1938 static void sm_3_dstreg(struct bc_writer *This,
1939                         const struct shader_reg *reg,
1940                         struct bytecode_buffer *buffer,
1941                         DWORD shift, DWORD mod) {
1942     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1943     DWORD d3d9reg;
1944
1945     if(reg->rel_reg) {
1946         if(This->version == BWRITERVS_VERSION(3, 0) &&
1947            reg->type == BWRITERSPR_OUTPUT) {
1948             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1949         } else {
1950             WARN("Relative addressing not supported for this shader type or register type\n");
1951             This->state = E_INVALIDARG;
1952             return;
1953         }
1954     }
1955
1956     d3d9reg = d3d9_register(reg->type);
1957     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1958     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1959     token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1960
1961     token |= d3d9_dstmod(mod);
1962
1963     token |= d3d9_writemask(reg->u.writemask);
1964     put_dword(buffer, token);
1965
1966     /* vs_2_0 and newer write the register containing the index explicitly in the
1967      * binary code
1968      */
1969     if(token & D3DVS_ADDRMODE_RELATIVE) {
1970         sm_3_srcreg(This, reg->rel_reg, buffer);
1971     }
1972 }
1973
1974 static const struct instr_handler_table vs_3_handlers[] = {
1975     {BWRITERSIO_ADD,            instr_handler},
1976     {BWRITERSIO_NOP,            instr_handler},
1977     {BWRITERSIO_MOV,            instr_handler},
1978     {BWRITERSIO_SUB,            instr_handler},
1979     {BWRITERSIO_MAD,            instr_handler},
1980     {BWRITERSIO_MUL,            instr_handler},
1981     {BWRITERSIO_RCP,            instr_handler},
1982     {BWRITERSIO_RSQ,            instr_handler},
1983     {BWRITERSIO_DP3,            instr_handler},
1984     {BWRITERSIO_DP4,            instr_handler},
1985     {BWRITERSIO_MIN,            instr_handler},
1986     {BWRITERSIO_MAX,            instr_handler},
1987     {BWRITERSIO_SLT,            instr_handler},
1988     {BWRITERSIO_SGE,            instr_handler},
1989     {BWRITERSIO_ABS,            instr_handler},
1990     {BWRITERSIO_EXP,            instr_handler},
1991     {BWRITERSIO_LOG,            instr_handler},
1992     {BWRITERSIO_EXPP,           instr_handler},
1993     {BWRITERSIO_LOGP,           instr_handler},
1994     {BWRITERSIO_DST,            instr_handler},
1995     {BWRITERSIO_LRP,            instr_handler},
1996     {BWRITERSIO_FRC,            instr_handler},
1997     {BWRITERSIO_CRS,            instr_handler},
1998     {BWRITERSIO_SGN,            instr_handler},
1999     {BWRITERSIO_NRM,            instr_handler},
2000     {BWRITERSIO_SINCOS,         instr_handler},
2001     {BWRITERSIO_M4x4,           instr_handler},
2002     {BWRITERSIO_M4x3,           instr_handler},
2003     {BWRITERSIO_M3x4,           instr_handler},
2004     {BWRITERSIO_M3x3,           instr_handler},
2005     {BWRITERSIO_M3x2,           instr_handler},
2006     {BWRITERSIO_LIT,            instr_handler},
2007     {BWRITERSIO_POW,            instr_handler},
2008     {BWRITERSIO_MOVA,           instr_handler},
2009
2010     {BWRITERSIO_CALL,           instr_handler},
2011     {BWRITERSIO_CALLNZ,         instr_handler},
2012     {BWRITERSIO_REP,            instr_handler},
2013     {BWRITERSIO_ENDREP,         instr_handler},
2014     {BWRITERSIO_IF,             instr_handler},
2015     {BWRITERSIO_LABEL,          instr_handler},
2016     {BWRITERSIO_IFC,            instr_handler},
2017     {BWRITERSIO_ELSE,           instr_handler},
2018     {BWRITERSIO_ENDIF,          instr_handler},
2019     {BWRITERSIO_BREAK,          instr_handler},
2020     {BWRITERSIO_BREAKC,         instr_handler},
2021     {BWRITERSIO_LOOP,           instr_handler},
2022     {BWRITERSIO_RET,            instr_handler},
2023     {BWRITERSIO_ENDLOOP,        instr_handler},
2024
2025     {BWRITERSIO_SETP,           instr_handler},
2026     {BWRITERSIO_BREAKP,         instr_handler},
2027     {BWRITERSIO_TEXLDL,         instr_handler},
2028
2029     {BWRITERSIO_END,            NULL},
2030 };
2031
2032 static const struct bytecode_backend vs_3_backend = {
2033     sm_3_header,
2034     end,
2035     sm_3_srcreg,
2036     sm_3_dstreg,
2037     sm_2_opcode,
2038     vs_3_handlers
2039 };
2040
2041 static const struct instr_handler_table ps_3_handlers[] = {
2042     {BWRITERSIO_ADD,            instr_handler},
2043     {BWRITERSIO_NOP,            instr_handler},
2044     {BWRITERSIO_MOV,            instr_handler},
2045     {BWRITERSIO_SUB,            instr_handler},
2046     {BWRITERSIO_MAD,            instr_handler},
2047     {BWRITERSIO_MUL,            instr_handler},
2048     {BWRITERSIO_RCP,            instr_handler},
2049     {BWRITERSIO_RSQ,            instr_handler},
2050     {BWRITERSIO_DP3,            instr_handler},
2051     {BWRITERSIO_DP4,            instr_handler},
2052     {BWRITERSIO_MIN,            instr_handler},
2053     {BWRITERSIO_MAX,            instr_handler},
2054     {BWRITERSIO_ABS,            instr_handler},
2055     {BWRITERSIO_EXP,            instr_handler},
2056     {BWRITERSIO_LOG,            instr_handler},
2057     {BWRITERSIO_EXPP,           instr_handler},
2058     {BWRITERSIO_LOGP,           instr_handler},
2059     {BWRITERSIO_LRP,            instr_handler},
2060     {BWRITERSIO_FRC,            instr_handler},
2061     {BWRITERSIO_CRS,            instr_handler},
2062     {BWRITERSIO_NRM,            instr_handler},
2063     {BWRITERSIO_SINCOS,         instr_handler},
2064     {BWRITERSIO_M4x4,           instr_handler},
2065     {BWRITERSIO_M4x3,           instr_handler},
2066     {BWRITERSIO_M3x4,           instr_handler},
2067     {BWRITERSIO_M3x3,           instr_handler},
2068     {BWRITERSIO_M3x2,           instr_handler},
2069     {BWRITERSIO_POW,            instr_handler},
2070     {BWRITERSIO_DP2ADD,         instr_handler},
2071     {BWRITERSIO_CMP,            instr_handler},
2072
2073     {BWRITERSIO_CALL,           instr_handler},
2074     {BWRITERSIO_CALLNZ,         instr_handler},
2075     {BWRITERSIO_REP,            instr_handler},
2076     {BWRITERSIO_ENDREP,         instr_handler},
2077     {BWRITERSIO_IF,             instr_handler},
2078     {BWRITERSIO_LABEL,          instr_handler},
2079     {BWRITERSIO_IFC,            instr_handler},
2080     {BWRITERSIO_ELSE,           instr_handler},
2081     {BWRITERSIO_ENDIF,          instr_handler},
2082     {BWRITERSIO_BREAK,          instr_handler},
2083     {BWRITERSIO_BREAKC,         instr_handler},
2084     {BWRITERSIO_LOOP,           instr_handler},
2085     {BWRITERSIO_RET,            instr_handler},
2086     {BWRITERSIO_ENDLOOP,        instr_handler},
2087
2088     {BWRITERSIO_SETP,           instr_handler},
2089     {BWRITERSIO_BREAKP,         instr_handler},
2090     {BWRITERSIO_TEXLDL,         instr_handler},
2091
2092     {BWRITERSIO_TEX,            instr_handler},
2093     {BWRITERSIO_TEXLDP,         instr_handler},
2094     {BWRITERSIO_TEXLDB,         instr_handler},
2095     {BWRITERSIO_TEXKILL,        instr_handler},
2096     {BWRITERSIO_DSX,            instr_handler},
2097     {BWRITERSIO_DSY,            instr_handler},
2098     {BWRITERSIO_TEXLDD,         instr_handler},
2099
2100     {BWRITERSIO_END,            NULL},
2101 };
2102
2103 static const struct bytecode_backend ps_3_backend = {
2104     sm_3_header,
2105     end,
2106     sm_3_srcreg,
2107     sm_3_dstreg,
2108     sm_2_opcode,
2109     ps_3_handlers
2110 };
2111
2112 static void init_vs10_dx9_writer(struct bc_writer *writer) {
2113     TRACE("Creating DirectX9 vertex shader 1.0 writer\n");
2114     writer->funcs = &vs_1_x_backend;
2115 }
2116
2117 static void init_vs11_dx9_writer(struct bc_writer *writer) {
2118     TRACE("Creating DirectX9 vertex shader 1.1 writer\n");
2119     writer->funcs = &vs_1_x_backend;
2120 }
2121
2122 static void init_vs20_dx9_writer(struct bc_writer *writer) {
2123     TRACE("Creating DirectX9 vertex shader 2.0 writer\n");
2124     writer->funcs = &vs_2_0_backend;
2125 }
2126
2127 static void init_vs2x_dx9_writer(struct bc_writer *writer) {
2128     TRACE("Creating DirectX9 vertex shader 2.x writer\n");
2129     writer->funcs = &vs_2_x_backend;
2130 }
2131
2132 static void init_vs30_dx9_writer(struct bc_writer *writer) {
2133     TRACE("Creating DirectX9 vertex shader 3.0 writer\n");
2134     writer->funcs = &vs_3_backend;
2135 }
2136
2137 static void init_ps10_dx9_writer(struct bc_writer *writer) {
2138     TRACE("Creating DirectX9 pixel shader 1.0 writer\n");
2139     writer->funcs = &ps_1_0123_backend;
2140 }
2141
2142 static void init_ps11_dx9_writer(struct bc_writer *writer) {
2143     TRACE("Creating DirectX9 pixel shader 1.1 writer\n");
2144     writer->funcs = &ps_1_0123_backend;
2145 }
2146
2147 static void init_ps12_dx9_writer(struct bc_writer *writer) {
2148     TRACE("Creating DirectX9 pixel shader 1.2 writer\n");
2149     writer->funcs = &ps_1_0123_backend;
2150 }
2151
2152 static void init_ps13_dx9_writer(struct bc_writer *writer) {
2153     TRACE("Creating DirectX9 pixel shader 1.3 writer\n");
2154     writer->funcs = &ps_1_0123_backend;
2155 }
2156
2157 static void init_ps14_dx9_writer(struct bc_writer *writer) {
2158     TRACE("Creating DirectX9 pixel shader 1.4 writer\n");
2159     writer->funcs = &ps_1_4_backend;
2160 }
2161
2162 static void init_ps20_dx9_writer(struct bc_writer *writer) {
2163     TRACE("Creating DirectX9 pixel shader 2.0 writer\n");
2164     writer->funcs = &ps_2_0_backend;
2165 }
2166
2167 static void init_ps2x_dx9_writer(struct bc_writer *writer) {
2168     TRACE("Creating DirectX9 pixel shader 2.x writer\n");
2169     writer->funcs = &ps_2_x_backend;
2170 }
2171
2172 static void init_ps30_dx9_writer(struct bc_writer *writer) {
2173     TRACE("Creating DirectX9 pixel shader 3.0 writer\n");
2174     writer->funcs = &ps_3_backend;
2175 }
2176
2177 static struct bc_writer *create_writer(DWORD version, DWORD dxversion) {
2178     struct bc_writer *ret = asm_alloc(sizeof(*ret));
2179
2180     if(!ret) {
2181         WARN("Failed to allocate a bytecode writer instance\n");
2182         return NULL;
2183     }
2184
2185     switch(version) {
2186         case BWRITERVS_VERSION(1, 0):
2187             if(dxversion != 9) {
2188                 WARN("Unsupported dxversion for vertex shader 1.0 requested: %u\n", dxversion);
2189                 goto fail;
2190             }
2191             init_vs10_dx9_writer(ret);
2192             break;
2193         case BWRITERVS_VERSION(1, 1):
2194             if(dxversion != 9) {
2195                 WARN("Unsupported dxversion for vertex shader 1.1 requested: %u\n", dxversion);
2196                 goto fail;
2197             }
2198             init_vs11_dx9_writer(ret);
2199             break;
2200         case BWRITERVS_VERSION(2, 0):
2201             if(dxversion != 9) {
2202                 WARN("Unsupported dxversion for vertex shader 2.0 requested: %u\n", dxversion);
2203                 goto fail;
2204             }
2205             init_vs20_dx9_writer(ret);
2206             break;
2207         case BWRITERVS_VERSION(2, 1):
2208             if(dxversion != 9) {
2209                 WARN("Unsupported dxversion for vertex shader 2.x requested: %u\n", dxversion);
2210                 goto fail;
2211             }
2212             init_vs2x_dx9_writer(ret);
2213             break;
2214         case BWRITERVS_VERSION(3, 0):
2215             if(dxversion != 9) {
2216                 WARN("Unsupported dxversion for vertex shader 3.0 requested: %u\n", dxversion);
2217                 goto fail;
2218             }
2219             init_vs30_dx9_writer(ret);
2220             break;
2221
2222         case BWRITERPS_VERSION(1, 0):
2223             if(dxversion != 9) {
2224                 WARN("Unsupported dxversion for pixel shader 1.0 requested: %u\n", dxversion);
2225                 goto fail;
2226             }
2227             init_ps10_dx9_writer(ret);
2228             break;
2229         case BWRITERPS_VERSION(1, 1):
2230             if(dxversion != 9) {
2231                 WARN("Unsupported dxversion for pixel shader 1.1 requested: %u\n", dxversion);
2232                 goto fail;
2233             }
2234             init_ps11_dx9_writer(ret);
2235             break;
2236         case BWRITERPS_VERSION(1, 2):
2237             if(dxversion != 9) {
2238                 WARN("Unsupported dxversion for pixel shader 1.2 requested: %u\n", dxversion);
2239                 goto fail;
2240             }
2241             init_ps12_dx9_writer(ret);
2242             break;
2243         case BWRITERPS_VERSION(1, 3):
2244             if(dxversion != 9) {
2245                 WARN("Unsupported dxversion for pixel shader 1.3 requested: %u\n", dxversion);
2246                 goto fail;
2247             }
2248             init_ps13_dx9_writer(ret);
2249             break;
2250         case BWRITERPS_VERSION(1, 4):
2251             if(dxversion != 9) {
2252                 WARN("Unsupported dxversion for pixel shader 1.4 requested: %u\n", dxversion);
2253                 goto fail;
2254             }
2255             init_ps14_dx9_writer(ret);
2256             break;
2257
2258         case BWRITERPS_VERSION(2, 0):
2259             if(dxversion != 9) {
2260                 WARN("Unsupported dxversion for pixel shader 2.0 requested: %u\n", dxversion);
2261                 goto fail;
2262             }
2263             init_ps20_dx9_writer(ret);
2264             break;
2265
2266         case BWRITERPS_VERSION(2, 1):
2267             if(dxversion != 9) {
2268                 WARN("Unsupported dxversion for pixel shader 2.x requested: %u\n", dxversion);
2269                 goto fail;
2270             }
2271             init_ps2x_dx9_writer(ret);
2272             break;
2273
2274         case BWRITERPS_VERSION(3, 0):
2275             if(dxversion != 9) {
2276                 WARN("Unsupported dxversion for pixel shader 3.0 requested: %u\n", dxversion);
2277                 goto fail;
2278             }
2279             init_ps30_dx9_writer(ret);
2280             break;
2281
2282         default:
2283             WARN("Unexpected shader version requested: %08x\n", version);
2284             goto fail;
2285     }
2286     ret->version = version;
2287     return ret;
2288
2289 fail:
2290     asm_free(ret);
2291     return NULL;
2292 }
2293
2294 static HRESULT call_instr_handler(struct bc_writer *writer,
2295                                   const struct instruction *instr,
2296                                   struct bytecode_buffer *buffer) {
2297     DWORD i=0;
2298
2299     while(writer->funcs->instructions[i].opcode != BWRITERSIO_END) {
2300         if(instr->opcode == writer->funcs->instructions[i].opcode) {
2301             if(!writer->funcs->instructions[i].func) {
2302                 WARN("Opcode %u not supported by this profile\n", instr->opcode);
2303                 return E_INVALIDARG;
2304             }
2305             writer->funcs->instructions[i].func(writer, instr, buffer);
2306             return S_OK;
2307         }
2308         i++;
2309     }
2310
2311     FIXME("Unhandled instruction %u - %s\n", instr->opcode,
2312           debug_print_opcode(instr->opcode));
2313     return E_INVALIDARG;
2314 }
2315
2316 /* SlWriteBytecode (wineshader.@)
2317  *
2318  * Writes shader version specific bytecode from the shader passed in.
2319  * The returned bytecode can be passed to the Direct3D runtime like
2320  * IDirect3DDevice9::Create*Shader.
2321  *
2322  * Parameters:
2323  *  shader: Shader to translate into bytecode
2324  *  version: Shader version to generate(d3d version token)
2325  *  dxversion: DirectX version the code targets
2326  *  result: the resulting shader bytecode
2327  *
2328  * Return values:
2329  *  S_OK on success
2330  */
2331 DWORD SlWriteBytecode(const struct bwriter_shader *shader, int dxversion, DWORD **result) {
2332     struct bc_writer *writer;
2333     struct bytecode_buffer *buffer = NULL;
2334     HRESULT hr;
2335     unsigned int i;
2336
2337     if(!shader){
2338         ERR("NULL shader structure, aborting\n");
2339         return E_FAIL;
2340     }
2341     writer = create_writer(shader->version, dxversion);
2342     *result = NULL;
2343
2344     if(!writer) {
2345         WARN("Could not create a bytecode writer instance. Either unsupported version\n");
2346         WARN("or out of memory\n");
2347         hr = E_FAIL;
2348         goto error;
2349     }
2350
2351     buffer = allocate_buffer();
2352     if(!buffer) {
2353         WARN("Failed to allocate a buffer for the shader bytecode\n");
2354         hr = E_FAIL;
2355         goto error;
2356     }
2357
2358     writer->funcs->header(writer, shader, buffer);
2359     if(FAILED(writer->state)) {
2360         hr = writer->state;
2361         goto error;
2362     }
2363
2364     for(i = 0; i < shader->num_instrs; i++) {
2365         hr = call_instr_handler(writer, shader->instr[i], buffer);
2366         if(FAILED(hr)) {
2367             goto error;
2368         }
2369     }
2370
2371     if(FAILED(writer->state)) {
2372         hr = writer->state;
2373         goto error;
2374     }
2375
2376     writer->funcs->end(writer, shader, buffer);
2377
2378     if(FAILED(buffer->state)) {
2379         hr = buffer->state;
2380         goto error;
2381     }
2382
2383     /* Cut off unneeded memory from the result buffer */
2384     *result = asm_realloc(buffer->data,
2385                          sizeof(DWORD) * buffer->size);
2386     if(!*result) {
2387         *result = buffer->data;
2388     }
2389     buffer->data = NULL;
2390     hr = S_OK;
2391
2392 error:
2393     if(buffer) {
2394         asm_free(buffer->data);
2395         asm_free(buffer);
2396     }
2397     asm_free(writer);
2398     return hr;
2399 }
2400
2401 void SlDeleteShader(struct bwriter_shader *shader) {
2402     unsigned int i, j;
2403
2404     TRACE("Deleting shader %p\n", shader);
2405
2406     for(i = 0; i < shader->num_cf; i++) {
2407         asm_free(shader->constF[i]);
2408     }
2409     asm_free(shader->constF);
2410     for(i = 0; i < shader->num_ci; i++) {
2411         asm_free(shader->constI[i]);
2412     }
2413     asm_free(shader->constI);
2414     for(i = 0; i < shader->num_cb; i++) {
2415         asm_free(shader->constB[i]);
2416     }
2417     asm_free(shader->constB);
2418
2419     asm_free(shader->inputs);
2420     asm_free(shader->outputs);
2421     asm_free(shader->samplers);
2422
2423     for(i = 0; i < shader->num_instrs; i++) {
2424         for(j = 0; j < shader->instr[i]->num_srcs; j++) {
2425             asm_free(shader->instr[i]->src[j].rel_reg);
2426         }
2427         asm_free(shader->instr[i]->src);
2428         asm_free(shader->instr[i]);
2429     }
2430     asm_free(shader->instr);
2431
2432     asm_free(shader);
2433 }