d3dx9: Shader assembler ps_2_x support.
[wine] / dlls / d3dx9_36 / bytecodewriter.c
1 /*
2  * Direct3D bytecode output functions
3  *
4  * Copyright 2008 Stefan Dösinger
5  * Copyright 2009 Matteo Bruni
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20  *
21  */
22
23 #include "config.h"
24 #include "wine/port.h"
25 #include "wine/debug.h"
26
27 #include "d3dx9_36_private.h"
28
29 WINE_DEFAULT_DEBUG_CHANNEL(asmshader);
30
31 /****************************************************************
32  * General assembler shader construction helper routines follow *
33  ****************************************************************/
34 /* struct instruction *alloc_instr
35  *
36  * Allocates a new instruction structure with srcs registers
37  *
38  * Parameters:
39  *  srcs: Number of source registers to allocate
40  *
41  * Returns:
42  *  A pointer to the allocated instruction structure
43  *  NULL in case of an allocation failure
44  */
45 struct instruction *alloc_instr(unsigned int srcs) {
46     struct instruction *ret = asm_alloc(sizeof(*ret));
47     if(!ret) {
48         ERR("Failed to allocate memory for an instruction structure\n");
49         return NULL;
50     }
51
52     if(srcs) {
53         ret->src = asm_alloc(srcs * sizeof(*ret->src));
54         if(!ret->src) {
55             ERR("Failed to allocate memory for instruction registers\n");
56             asm_free(ret);
57             return NULL;
58         }
59         ret->num_srcs = srcs;
60     }
61     return ret;
62 }
63
64 /* void add_instruction
65  *
66  * Adds a new instruction to the shader's instructions array and grows the instruction array
67  * if needed.
68  *
69  * The function does NOT copy the instruction structure. Make sure not to release the
70  * instruction or any of its substructures like registers.
71  *
72  * Parameters:
73  *  shader: Shader to add the instruction to
74  *  instr: Instruction to add to the shader
75  */
76 BOOL add_instruction(struct bwriter_shader *shader, struct instruction *instr) {
77     struct instruction      **new_instructions;
78
79     if(!shader) return FALSE;
80
81     if(shader->instr_alloc_size == 0) {
82         shader->instr = asm_alloc(sizeof(*shader->instr) * INSTRARRAY_INITIAL_SIZE);
83         if(!shader->instr) {
84             ERR("Failed to allocate the shader instruction array\n");
85             return FALSE;
86         }
87         shader->instr_alloc_size = INSTRARRAY_INITIAL_SIZE;
88     } else if(shader->instr_alloc_size == shader->num_instrs) {
89         new_instructions = asm_realloc(shader->instr,
90                                        sizeof(*shader->instr) * (shader->instr_alloc_size) * 2);
91         if(!new_instructions) {
92             ERR("Failed to grow the shader instruction array\n");
93             return FALSE;
94         }
95         shader->instr = new_instructions;
96         shader->instr_alloc_size = shader->instr_alloc_size * 2;
97     } else if(shader->num_instrs > shader->instr_alloc_size) {
98         ERR("More instructions than allocated. This should not happen\n");
99         return FALSE;
100     }
101
102     shader->instr[shader->num_instrs] = instr;
103     shader->num_instrs++;
104     return TRUE;
105 }
106
107 BOOL add_constF(struct bwriter_shader *shader, DWORD reg, float x, float y, float z, float w) {
108     struct constant *newconst;
109
110     if(shader->num_cf) {
111         struct constant **newarray;
112         newarray = asm_realloc(shader->constF,
113                                sizeof(*shader->constF) * (shader->num_cf + 1));
114         if(!newarray) {
115             ERR("Failed to grow the constants array\n");
116             return FALSE;
117         }
118         shader->constF = newarray;
119     } else {
120         shader->constF = asm_alloc(sizeof(*shader->constF));
121         if(!shader->constF) {
122             ERR("Failed to allocate the constants array\n");
123             return FALSE;
124         }
125     }
126
127     newconst = asm_alloc(sizeof(*newconst));
128     if(!newconst) {
129         ERR("Failed to allocate a new constant\n");
130         return FALSE;
131     }
132     newconst->regnum = reg;
133     newconst->value[0].f = x;
134     newconst->value[1].f = y;
135     newconst->value[2].f = z;
136     newconst->value[3].f = w;
137     shader->constF[shader->num_cf] = newconst;
138
139     shader->num_cf++;
140     return TRUE;
141 }
142
143 BOOL add_constI(struct bwriter_shader *shader, DWORD reg, INT x, INT y, INT z, INT w) {
144     struct constant *newconst;
145
146     if(shader->num_ci) {
147         struct constant **newarray;
148         newarray = asm_realloc(shader->constI,
149                                sizeof(*shader->constI) * (shader->num_ci + 1));
150         if(!newarray) {
151             ERR("Failed to grow the constants array\n");
152             return FALSE;
153         }
154         shader->constI = newarray;
155     } else {
156         shader->constI = asm_alloc(sizeof(*shader->constI));
157         if(!shader->constI) {
158             ERR("Failed to allocate the constants array\n");
159             return FALSE;
160         }
161     }
162
163     newconst = asm_alloc(sizeof(*newconst));
164     if(!newconst) {
165         ERR("Failed to allocate a new constant\n");
166         return FALSE;
167     }
168     newconst->regnum = reg;
169     newconst->value[0].i = x;
170     newconst->value[1].i = y;
171     newconst->value[2].i = z;
172     newconst->value[3].i = w;
173     shader->constI[shader->num_ci] = newconst;
174
175     shader->num_ci++;
176     return TRUE;
177 }
178
179 BOOL add_constB(struct bwriter_shader *shader, DWORD reg, BOOL x) {
180     struct constant *newconst;
181
182     if(shader->num_cb) {
183         struct constant **newarray;
184         newarray = asm_realloc(shader->constB,
185                                sizeof(*shader->constB) * (shader->num_cb + 1));
186         if(!newarray) {
187             ERR("Failed to grow the constants array\n");
188             return FALSE;
189         }
190         shader->constB = newarray;
191     } else {
192         shader->constB = asm_alloc(sizeof(*shader->constB));
193         if(!shader->constB) {
194             ERR("Failed to allocate the constants array\n");
195             return FALSE;
196         }
197     }
198
199     newconst = asm_alloc(sizeof(*newconst));
200     if(!newconst) {
201         ERR("Failed to allocate a new constant\n");
202         return FALSE;
203     }
204     newconst->regnum = reg;
205     newconst->value[0].b = x;
206     shader->constB[shader->num_cb] = newconst;
207
208     shader->num_cb++;
209     return TRUE;
210 }
211
212 BOOL record_declaration(struct bwriter_shader *shader, DWORD usage, DWORD usage_idx, BOOL output, DWORD regnum, DWORD writemask) {
213     unsigned int *num;
214     struct declaration **decl;
215     unsigned int i;
216
217     if(!shader) return FALSE;
218
219     if(output) {
220         num = &shader->num_outputs;
221         decl = &shader->outputs;
222     } else {
223         num = &shader->num_inputs;
224         decl = &shader->inputs;
225     }
226
227     if(*num == 0) {
228         *decl = asm_alloc(sizeof(**decl));
229         if(!*decl) {
230             ERR("Error allocating declarations array\n");
231             return FALSE;
232         }
233     } else {
234         struct declaration *newdecl;
235         for(i = 0; i < *num; i++) {
236             if((*decl)[i].regnum == regnum && ((*decl)[i].writemask & writemask)) {
237                 WARN("Declaration of register %u already exists, writemask match 0x%x\n",
238                       regnum, (*decl)[i].writemask & writemask);
239             }
240         }
241
242         newdecl = asm_realloc(*decl,
243                               sizeof(**decl) * ((*num) + 1));
244         if(!newdecl) {
245             ERR("Error reallocating declarations array\n");
246             return FALSE;
247         }
248         *decl = newdecl;
249     }
250     (*decl)[*num].usage = usage;
251     (*decl)[*num].usage_idx = usage_idx;
252     (*decl)[*num].regnum = regnum;
253     (*decl)[*num].writemask = writemask;
254     (*num)++;
255
256     return TRUE;
257 }
258
259 BOOL record_sampler(struct bwriter_shader *shader, DWORD samptype, DWORD regnum) {
260     unsigned int i;
261
262     if(!shader) return FALSE;
263
264     if(shader->num_samplers == 0) {
265         shader->samplers = asm_alloc(sizeof(*shader->samplers));
266         if(!shader->samplers) {
267             ERR("Error allocating samplers array\n");
268             return FALSE;
269         }
270     } else {
271         struct samplerdecl *newarray;
272
273         for(i = 0; i < shader->num_samplers; i++) {
274             if(shader->samplers[i].regnum == regnum) {
275                 WARN("Sampler %u already declared\n", regnum);
276                 /* This is not an error as far as the assembler is concerned.
277                  * Direct3D might refuse to load the compiled shader though
278                  */
279             }
280         }
281
282         newarray = asm_realloc(shader->samplers,
283                                sizeof(*shader->samplers) * (shader->num_samplers + 1));
284         if(!newarray) {
285             ERR("Error reallocating samplers array\n");
286             return FALSE;
287         }
288         shader->samplers = newarray;
289     }
290
291     shader->samplers[shader->num_samplers].type = samptype;
292     shader->samplers[shader->num_samplers].regnum = regnum;
293     shader->num_samplers++;
294     return TRUE;
295 }
296
297
298 /* shader bytecode buffer manipulation functions.
299  * allocate_buffer creates a new buffer structure, put_dword adds a new
300  * DWORD to the buffer. In the rare case of a memory allocation failure
301  * when trying to grow the buffer a flag is set in the buffer to mark it
302  * invalid. This avoids return value checking and passing in many places
303  */
304 static struct bytecode_buffer *allocate_buffer(void) {
305     struct bytecode_buffer *ret;
306
307     ret = asm_alloc(sizeof(*ret));
308     if(!ret) return NULL;
309
310     ret->alloc_size = BYTECODEBUFFER_INITIAL_SIZE;
311     ret->data = asm_alloc(sizeof(DWORD) * ret->alloc_size);
312     if(!ret->data) {
313         asm_free(ret);
314         return NULL;
315     }
316     ret->state = S_OK;
317     return ret;
318 }
319
320 static void put_dword(struct bytecode_buffer *buffer, DWORD value) {
321     if(FAILED(buffer->state)) return;
322
323     if(buffer->alloc_size == buffer->size) {
324         DWORD *newarray;
325         buffer->alloc_size *= 2;
326         newarray = asm_realloc(buffer->data,
327                                sizeof(DWORD) * buffer->alloc_size);
328         if(!newarray) {
329             ERR("Failed to grow the buffer data memory\n");
330             buffer->state = E_OUTOFMEMORY;
331             return;
332         }
333         buffer->data = newarray;
334     }
335     buffer->data[buffer->size++] = value;
336 }
337
338 /******************************************************
339  * Implementation of the writer functions starts here *
340  ******************************************************/
341 static void write_declarations(struct bytecode_buffer *buffer, BOOL len,
342                                const struct declaration *decls, unsigned int num, DWORD type) {
343     DWORD i;
344     DWORD instr_dcl = D3DSIO_DCL;
345     DWORD token;
346
347     if(len) {
348         instr_dcl |= 2 << D3DSI_INSTLENGTH_SHIFT;
349     }
350
351     for(i = 0; i < num; i++) {
352         /* Write the DCL instruction */
353         put_dword(buffer, instr_dcl);
354
355         /* Write the usage and index */
356         token = (1 << 31); /* Bit 31 of non-instruction opcodes is 1 */
357         token |= (decls[i].usage << D3DSP_DCL_USAGE_SHIFT) & D3DSP_DCL_USAGE_MASK;
358         token |= (decls[i].usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT) & D3DSP_DCL_USAGEINDEX_MASK;
359         put_dword(buffer, token);
360
361         /* Write the dest register */
362         token = (1 << 31); /* Bit 31 of non-instruction opcodes is 1 */
363         token |= (type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
364         token |= (d3d9_writemask(decls[i].writemask)) & D3DSP_WRITEMASK_ALL;
365         token |= decls[i].regnum & D3DSP_REGNUM_MASK;
366         put_dword(buffer, token);
367     }
368 }
369
370 static void write_const(struct constant **consts, int num, DWORD opcode, DWORD reg_type, struct bytecode_buffer *buffer, BOOL len) {
371     DWORD i;
372     DWORD instr_def = opcode;
373     const DWORD reg = (1<<31) |
374                       ((reg_type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
375                       ((reg_type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
376                       D3DSP_WRITEMASK_ALL;
377
378     if(len) {
379         if(opcode == D3DSIO_DEFB)
380             instr_def |= 2 << D3DSI_INSTLENGTH_SHIFT;
381         else
382             instr_def |= 5 << D3DSI_INSTLENGTH_SHIFT;
383     }
384
385     for(i = 0; i < num; i++) {
386         /* Write the DEF instruction */
387         put_dword(buffer, instr_def);
388
389         put_dword(buffer, reg | (consts[i]->regnum & D3DSP_REGNUM_MASK));
390         put_dword(buffer, consts[i]->value[0].d);
391         if(opcode != D3DSIO_DEFB) {
392             put_dword(buffer, consts[i]->value[1].d);
393             put_dword(buffer, consts[i]->value[2].d);
394             put_dword(buffer, consts[i]->value[3].d);
395         }
396     }
397 }
398
399 static void write_constF(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
400     write_const(shader->constF, shader->num_cf, D3DSIO_DEF, D3DSPR_CONST, buffer, len);
401 }
402
403 static HRESULT vs_find_builtin_varyings(struct bc_writer *This, const struct bwriter_shader *shader) {
404     DWORD i;
405     DWORD usage, usage_idx, writemask, regnum;
406
407     for(i = 0; i < shader->num_outputs; i++) {
408         usage = shader->outputs[i].usage;
409         usage_idx = shader->outputs[i].usage_idx;
410         writemask = shader->outputs[i].writemask;
411         regnum = shader->outputs[i].regnum;
412
413         switch(usage) {
414             case BWRITERDECLUSAGE_POSITION:
415             case BWRITERDECLUSAGE_POSITIONT:
416                 if(usage_idx > 0) {
417                     WARN("dcl_position%u not supported in sm 1/2 shaders\n", usage_idx);
418                     return E_INVALIDARG;
419                 }
420                 TRACE("o%u is oPos\n", regnum);
421                 This->oPos_regnum = regnum;
422                 break;
423
424             case BWRITERDECLUSAGE_COLOR:
425                 if(usage_idx > 1) {
426                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
427                     return E_INVALIDARG;
428                 }
429                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
430                     WARN("Only WRITEMASK_ALL is supported on color in sm 1/2\n");
431                     return E_INVALIDARG;
432                 }
433                 TRACE("o%u is oD%u\n", regnum, usage_idx);
434                 This->oD_regnum[usage_idx] = regnum;
435                 break;
436
437             case BWRITERDECLUSAGE_TEXCOORD:
438                 if(usage_idx > 8) {
439                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
440                     return E_INVALIDARG;
441                 }
442                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
443                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
444                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
445                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
446                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
447                     return E_INVALIDARG;
448                 }
449                 TRACE("o%u is oT%u\n", regnum, usage_idx);
450                 This->oT_regnum[usage_idx] = regnum;
451                 break;
452
453             case BWRITERDECLUSAGE_PSIZE:
454                 if(usage_idx > 0) {
455                     WARN("dcl_psize%u not supported in sm 1/2 shaders\n", usage_idx);
456                     return E_INVALIDARG;
457                 }
458                 TRACE("o%u writemask 0x%08x is oPts\n", regnum, writemask);
459                 This->oPts_regnum = regnum;
460                 This->oPts_mask = writemask;
461                 break;
462
463             case BWRITERDECLUSAGE_FOG:
464                 if(usage_idx > 0) {
465                     WARN("dcl_fog%u not supported in sm 1 shaders\n", usage_idx);
466                     return E_INVALIDARG;
467                 }
468                 if(writemask != BWRITERSP_WRITEMASK_0 && writemask != BWRITERSP_WRITEMASK_1 &&
469                    writemask != BWRITERSP_WRITEMASK_2 && writemask != BWRITERSP_WRITEMASK_3) {
470                     WARN("Unsupported fog writemask\n");
471                     return E_INVALIDARG;
472                 }
473                 TRACE("o%u writemask 0x%08x is oFog\n", regnum, writemask);
474                 This->oFog_regnum = regnum;
475                 This->oFog_mask = writemask;
476                 break;
477
478             default:
479                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
480                 return E_INVALIDARG;
481         }
482     }
483
484     return S_OK;
485 }
486
487 static HRESULT find_ps_builtin_semantics(struct bc_writer *This,
488                                          const struct bwriter_shader *shader,
489                                          DWORD texcoords) {
490     DWORD i;
491     DWORD usage, usage_idx, writemask, regnum;
492
493     This->v_regnum[0] = -1; This->v_regnum[1] = -1;
494     for(i = 0; i < 8; i++) This->t_regnum[i] = -1;
495
496     for(i = 0; i < shader->num_inputs; i++) {
497         usage = shader->inputs[i].usage;
498         usage_idx = shader->inputs[i].usage_idx;
499         writemask = shader->inputs[i].writemask;
500         regnum = shader->inputs[i].regnum;
501
502         switch(usage) {
503             case BWRITERDECLUSAGE_COLOR:
504                 if(usage_idx > 1) {
505                     WARN("dcl_color%u not supported in sm 1 shaders\n", usage_idx);
506                     return E_INVALIDARG;
507                 }
508                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
509                     WARN("Only WRITEMASK_ALL is supported on color in sm 1\n");
510                     return E_INVALIDARG;
511                 }
512                 TRACE("v%u is v%u\n", regnum, usage_idx);
513                 This->v_regnum[usage_idx] = regnum;
514                 break;
515
516             case BWRITERDECLUSAGE_TEXCOORD:
517                 if(usage_idx > texcoords) {
518                     WARN("dcl_texcoord%u not supported in this shader version\n", usage_idx);
519                     return E_INVALIDARG;
520                 }
521                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
522                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
523                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
524                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
525                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
526                 } else {
527                     writemask = BWRITERSP_WRITEMASK_ALL;
528                 }
529                 TRACE("v%u is t%u\n", regnum, usage_idx);
530                 This->t_regnum[usage_idx] = regnum;
531                 break;
532
533             default:
534                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
535                 return E_INVALIDARG;
536         }
537     }
538
539     return S_OK;
540 }
541
542 static void end(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
543     put_dword(buffer, D3DSIO_END);
544 }
545
546 static DWORD map_vs_output(struct bc_writer *This, DWORD regnum, DWORD mask, DWORD *has_components) {
547     DWORD token = 0;
548     DWORD i;
549
550     *has_components = TRUE;
551     if(regnum == This->oPos_regnum) {
552         token |= (D3DSPR_RASTOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
553         token |= D3DSRO_POSITION & D3DSP_REGNUM_MASK; /* No shift */
554         return token;
555     }
556     if(regnum == This->oFog_regnum && mask == This->oFog_mask) {
557         token |= (D3DSPR_RASTOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
558         token |= D3DSRO_FOG & D3DSP_REGNUM_MASK; /* No shift */
559         token |= D3DSP_WRITEMASK_ALL;
560         *has_components = FALSE;
561         return token;
562     }
563     if(regnum == This->oPts_regnum && mask == This->oPts_mask) {
564         token |= (D3DSPR_RASTOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
565         token |= D3DSRO_POINT_SIZE & D3DSP_REGNUM_MASK; /* No shift */
566         token |= D3DSP_WRITEMASK_ALL;
567         *has_components = FALSE;
568         return token;
569     }
570     for(i = 0; i < 2; i++) {
571         if(regnum == This->oD_regnum[i]) {
572             token |= (D3DSPR_ATTROUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
573             token |= i & D3DSP_REGNUM_MASK; /* No shift */
574             return token;
575         }
576     }
577     for(i = 0; i < 8; i++) {
578         if(regnum == This->oT_regnum[i]) {
579             token |= (D3DSPR_TEXCRDOUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
580             token |= i & D3DSP_REGNUM_MASK; /* No shift */
581             return token;
582         }
583     }
584
585     /* The varying must be undeclared - if an unsupported varying was declared,
586      * the vs_find_builtin_varyings function would have caught it and this code
587      * would not run */
588     WARN("Undeclared varying %u\n", regnum);
589     This->state = E_INVALIDARG;
590     return -1;
591 }
592
593 static void vs_12_dstreg(struct bc_writer *This, const struct shader_reg *reg,
594                          struct bytecode_buffer *buffer,
595                          DWORD shift, DWORD mod) {
596     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
597     DWORD has_wmask;
598
599     if(reg->rel_reg) {
600         WARN("Relative addressing not supported for destination registers\n");
601         This->state = E_INVALIDARG;
602         return;
603     }
604
605     switch(reg->type) {
606         case BWRITERSPR_OUTPUT:
607             token |= map_vs_output(This, reg->regnum, reg->writemask, &has_wmask);
608             break;
609
610         case BWRITERSPR_RASTOUT:
611         case BWRITERSPR_ATTROUT:
612             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
613             * but are unexpected. If we hit this path it might be due to an error.
614             */
615             FIXME("Unexpected register type %u\n", reg->type);
616             /* drop through */
617         case BWRITERSPR_INPUT:
618         case BWRITERSPR_TEMP:
619         case BWRITERSPR_CONST:
620             token |= (reg->type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
621             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
622             has_wmask = TRUE;
623             break;
624
625         case BWRITERSPR_ADDR:
626             if(reg->regnum != 0) {
627                 WARN("Only a0 exists\n");
628                 This->state = E_INVALIDARG;
629                 return;
630             }
631             token |= (D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
632             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
633             has_wmask = TRUE;
634             break;
635
636         case BWRITERSPR_PREDICATE:
637             if(This->version != BWRITERVS_VERSION(2, 1)){
638                 WARN("Predicate register is allowed only in vs_2_x\n");
639                 This->state = E_INVALIDARG;
640                 return;
641             }
642             if(reg->regnum != 0) {
643                 WARN("Only predicate register p0 exists\n");
644                 This->state = E_INVALIDARG;
645                 return;
646             }
647             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
648             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
649             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
650             has_wmask = TRUE;
651             break;
652
653         default:
654             WARN("Invalid register type for 1.x-2.x vertex shader\n");
655             This->state = E_INVALIDARG;
656             return;
657     }
658
659     /* strictly speaking there are no modifiers in vs_2_0 and vs_1_x, but they can be written
660      * into the bytecode and since the compiler doesn't do such checks write them
661      * (the checks are done by the undocumented shader validator)
662      */
663     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
664     token |= d3d9_dstmod(mod);
665
666     if(has_wmask) {
667         token |= d3d9_writemask(reg->writemask);
668     }
669     put_dword(buffer, token);
670 }
671
672 static void write_srcregs(struct bc_writer *This, const struct instruction *instr,
673                           struct bytecode_buffer *buffer){
674     unsigned int i;
675     if(instr->has_predicate){
676         This->funcs->srcreg(This, &instr->predicate, buffer);
677     }
678     for(i = 0; i < instr->num_srcs; i++){
679         This->funcs->srcreg(This, &instr->src[i], buffer);
680     }
681 }
682
683 static DWORD map_ps_input(struct bc_writer *This,
684                           const struct shader_reg *reg) {
685     DWORD i, token = 0;
686     /* Map color interpolators */
687     for(i = 0; i < 2; i++) {
688         if(reg->regnum == This->v_regnum[i]) {
689             token |= (D3DSPR_INPUT << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
690             token |= i & D3DSP_REGNUM_MASK; /* No shift */
691             return token;
692         }
693     }
694     for(i = 0; i < 8; i++) {
695         if(reg->regnum == This->t_regnum[i]) {
696             token |= (D3DSPR_TEXTURE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
697             token |= i & D3DSP_REGNUM_MASK; /* No shift */
698             return token;
699         }
700     }
701
702     WARN("Invalid ps 1/2 varying\n");
703     This->state = E_INVALIDARG;
704     return token;
705 }
706
707 /* The length of an instruction consists of the destination register (if any),
708  * the number of source registers, the number of address registers used for
709  * indirect addressing, and optionally the predicate register
710  */
711 static DWORD instrlen(const struct instruction *instr, unsigned int srcs, unsigned int dsts) {
712     unsigned int i;
713     DWORD ret = srcs + dsts + (instr->has_predicate ? 1 : 0);
714
715     if(dsts){
716         if(instr->dst.rel_reg) ret++;
717     }
718     for(i = 0; i < srcs; i++) {
719         if(instr->src[i].rel_reg) ret++;
720     }
721     return ret;
722 }
723
724 static void instr_handler(struct bc_writer *This,
725                           const struct instruction *instr,
726                           struct bytecode_buffer *buffer) {
727     DWORD token = d3d9_opcode(instr->opcode);
728     TRACE("token: %x\n", token);
729
730     This->funcs->opcode(This, instr, token, buffer);
731     if(instr->has_dst) This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
732     write_srcregs(This, instr, buffer);
733 }
734
735 static void write_constB(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
736     write_const(shader->constB, shader->num_cb, D3DSIO_DEFB, D3DSPR_CONSTBOOL, buffer, len);
737 }
738
739 static void write_constI(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
740     write_const(shader->constI, shader->num_ci, D3DSIO_DEFI, D3DSPR_CONSTINT, buffer, len);
741 }
742
743 static void vs_2_header(struct bc_writer *This,
744                         const struct bwriter_shader *shader,
745                         struct bytecode_buffer *buffer) {
746     HRESULT hr;
747
748     hr = vs_find_builtin_varyings(This, shader);
749     if(FAILED(hr)) {
750         This->state = hr;
751         return;
752     }
753
754     /* Declare the shader type and version */
755     put_dword(buffer, This->version);
756
757     write_declarations(buffer, TRUE, shader->inputs, shader->num_inputs, D3DSPR_INPUT);
758     write_constF(shader, buffer, TRUE);
759     write_constB(shader, buffer, TRUE);
760     write_constI(shader, buffer, TRUE);
761     return;
762 }
763
764 static void vs_2_srcreg(struct bc_writer *This,
765                         const struct shader_reg *reg,
766                         struct bytecode_buffer *buffer) {
767     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
768     DWORD has_swizzle;
769     DWORD component;
770     DWORD d3d9reg;
771
772     switch(reg->type) {
773         case BWRITERSPR_OUTPUT:
774             /* Map the swizzle to a writemask, the format expected
775                by map_vs_output
776              */
777             switch(reg->swizzle) {
778                 case BWRITERVS_SWIZZLE_X:
779                     component = BWRITERSP_WRITEMASK_0;
780                     break;
781                 case BWRITERVS_SWIZZLE_Y:
782                     component = BWRITERSP_WRITEMASK_1;
783                     break;
784                 case BWRITERVS_SWIZZLE_Z:
785                     component = BWRITERSP_WRITEMASK_2;
786                     break;
787                 case BWRITERVS_SWIZZLE_W:
788                     component = BWRITERSP_WRITEMASK_3;
789                     break;
790                 default:
791                     component = 0;
792             }
793             token |= map_vs_output(This, reg->regnum, component, &has_swizzle);
794             break;
795
796         case BWRITERSPR_RASTOUT:
797         case BWRITERSPR_ATTROUT:
798             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
799              * but are unexpected. If we hit this path it might be due to an error.
800              */
801             FIXME("Unexpected register type %u\n", reg->type);
802             /* drop through */
803         case BWRITERSPR_INPUT:
804         case BWRITERSPR_TEMP:
805         case BWRITERSPR_CONST:
806         case BWRITERSPR_ADDR:
807         case BWRITERSPR_CONSTINT:
808         case BWRITERSPR_CONSTBOOL:
809         case BWRITERSPR_LABEL:
810             d3d9reg = d3d9_register(reg->type);
811             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
812             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
813             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
814             break;
815
816         case BWRITERSPR_LOOP:
817             if(reg->regnum != 0) {
818                 WARN("Only regnum 0 is supported for the loop index register in vs_2_0\n");
819                 This->state = E_INVALIDARG;
820                 return;
821             }
822             token |= (D3DSPR_LOOP << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
823             token |= (D3DSPR_LOOP << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
824             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
825             break;
826
827         case BWRITERSPR_PREDICATE:
828             if(This->version != BWRITERVS_VERSION(2, 1)){
829                 WARN("Predicate register is allowed only in vs_2_x\n");
830                 This->state = E_INVALIDARG;
831                 return;
832             }
833             if(reg->regnum > 0) {
834                 WARN("Only predicate register 0 is supported\n");
835                 This->state = E_INVALIDARG;
836                 return;
837             }
838             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
839             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
840             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
841
842             break;
843
844         default:
845             WARN("Invalid register type for 2.0 vshader\n");
846             This->state = E_INVALIDARG;
847             return;
848     }
849
850     token |= d3d9_swizzle(reg->swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
851
852     token |= d3d9_srcmod(reg->srcmod);
853
854     if(reg->rel_reg)
855         token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
856
857     put_dword(buffer, token);
858
859     /* vs_2_0 and newer write the register containing the index explicitly in the
860      * binary code
861      */
862     if(token & D3DVS_ADDRMODE_RELATIVE)
863         vs_2_srcreg(This, reg->rel_reg, buffer);
864 }
865
866 static void sm_2_opcode(struct bc_writer *This,
867                         const struct instruction *instr,
868                         DWORD token, struct bytecode_buffer *buffer) {
869     /* From sm 2 onwards instruction length is encoded in the opcode field */
870     int dsts = instr->has_dst ? 1 : 0;
871     token |= instrlen(instr, instr->num_srcs, dsts) << D3DSI_INSTLENGTH_SHIFT;
872     if(instr->comptype)
873         token |= (d3d9_comparetype(instr->comptype) << 16) & (0xf << 16);
874     if(instr->has_predicate)
875         token |= D3DSHADER_INSTRUCTION_PREDICATED;
876     put_dword(buffer,token);
877 }
878
879 static const struct instr_handler_table vs_2_0_handlers[] = {
880     {BWRITERSIO_ADD,            instr_handler},
881     {BWRITERSIO_NOP,            instr_handler},
882     {BWRITERSIO_MOV,            instr_handler},
883     {BWRITERSIO_SUB,            instr_handler},
884     {BWRITERSIO_MAD,            instr_handler},
885     {BWRITERSIO_MUL,            instr_handler},
886     {BWRITERSIO_RCP,            instr_handler},
887     {BWRITERSIO_RSQ,            instr_handler},
888     {BWRITERSIO_DP3,            instr_handler},
889     {BWRITERSIO_DP4,            instr_handler},
890     {BWRITERSIO_MIN,            instr_handler},
891     {BWRITERSIO_MAX,            instr_handler},
892     {BWRITERSIO_SLT,            instr_handler},
893     {BWRITERSIO_SGE,            instr_handler},
894     {BWRITERSIO_ABS,            instr_handler},
895     {BWRITERSIO_EXP,            instr_handler},
896     {BWRITERSIO_LOG,            instr_handler},
897     {BWRITERSIO_EXPP,           instr_handler},
898     {BWRITERSIO_LOGP,           instr_handler},
899     {BWRITERSIO_DST,            instr_handler},
900     {BWRITERSIO_LRP,            instr_handler},
901     {BWRITERSIO_FRC,            instr_handler},
902     {BWRITERSIO_CRS,            instr_handler},
903     {BWRITERSIO_SGN,            instr_handler},
904     {BWRITERSIO_NRM,            instr_handler},
905     {BWRITERSIO_SINCOS,         instr_handler},
906     {BWRITERSIO_M4x4,           instr_handler},
907     {BWRITERSIO_M4x3,           instr_handler},
908     {BWRITERSIO_M3x4,           instr_handler},
909     {BWRITERSIO_M3x3,           instr_handler},
910     {BWRITERSIO_M3x2,           instr_handler},
911     {BWRITERSIO_LIT,            instr_handler},
912     {BWRITERSIO_POW,            instr_handler},
913     {BWRITERSIO_MOVA,           instr_handler},
914
915     {BWRITERSIO_CALL,           instr_handler},
916     {BWRITERSIO_CALLNZ,         instr_handler},
917     {BWRITERSIO_REP,            instr_handler},
918     {BWRITERSIO_ENDREP,         instr_handler},
919     {BWRITERSIO_IF,             instr_handler},
920     {BWRITERSIO_LABEL,          instr_handler},
921     {BWRITERSIO_ELSE,           instr_handler},
922     {BWRITERSIO_ENDIF,          instr_handler},
923     {BWRITERSIO_LOOP,           instr_handler},
924     {BWRITERSIO_RET,            instr_handler},
925     {BWRITERSIO_ENDLOOP,        instr_handler},
926
927     {BWRITERSIO_END,            NULL},
928 };
929
930 static const struct bytecode_backend vs_2_0_backend = {
931     vs_2_header,
932     end,
933     vs_2_srcreg,
934     vs_12_dstreg,
935     sm_2_opcode,
936     vs_2_0_handlers
937 };
938
939 static const struct instr_handler_table vs_2_x_handlers[] = {
940     {BWRITERSIO_ADD,            instr_handler},
941     {BWRITERSIO_NOP,            instr_handler},
942     {BWRITERSIO_MOV,            instr_handler},
943     {BWRITERSIO_SUB,            instr_handler},
944     {BWRITERSIO_MAD,            instr_handler},
945     {BWRITERSIO_MUL,            instr_handler},
946     {BWRITERSIO_RCP,            instr_handler},
947     {BWRITERSIO_RSQ,            instr_handler},
948     {BWRITERSIO_DP3,            instr_handler},
949     {BWRITERSIO_DP4,            instr_handler},
950     {BWRITERSIO_MIN,            instr_handler},
951     {BWRITERSIO_MAX,            instr_handler},
952     {BWRITERSIO_SLT,            instr_handler},
953     {BWRITERSIO_SGE,            instr_handler},
954     {BWRITERSIO_ABS,            instr_handler},
955     {BWRITERSIO_EXP,            instr_handler},
956     {BWRITERSIO_LOG,            instr_handler},
957     {BWRITERSIO_EXPP,           instr_handler},
958     {BWRITERSIO_LOGP,           instr_handler},
959     {BWRITERSIO_DST,            instr_handler},
960     {BWRITERSIO_LRP,            instr_handler},
961     {BWRITERSIO_FRC,            instr_handler},
962     {BWRITERSIO_CRS,            instr_handler},
963     {BWRITERSIO_SGN,            instr_handler},
964     {BWRITERSIO_NRM,            instr_handler},
965     {BWRITERSIO_SINCOS,         instr_handler},
966     {BWRITERSIO_M4x4,           instr_handler},
967     {BWRITERSIO_M4x3,           instr_handler},
968     {BWRITERSIO_M3x4,           instr_handler},
969     {BWRITERSIO_M3x3,           instr_handler},
970     {BWRITERSIO_M3x2,           instr_handler},
971     {BWRITERSIO_LIT,            instr_handler},
972     {BWRITERSIO_POW,            instr_handler},
973     {BWRITERSIO_MOVA,           instr_handler},
974
975     {BWRITERSIO_CALL,           instr_handler},
976     {BWRITERSIO_CALLNZ,         instr_handler},
977     {BWRITERSIO_REP,            instr_handler},
978     {BWRITERSIO_ENDREP,         instr_handler},
979     {BWRITERSIO_IF,             instr_handler},
980     {BWRITERSIO_LABEL,          instr_handler},
981     {BWRITERSIO_IFC,            instr_handler},
982     {BWRITERSIO_ELSE,           instr_handler},
983     {BWRITERSIO_ENDIF,          instr_handler},
984     {BWRITERSIO_BREAK,          instr_handler},
985     {BWRITERSIO_BREAKC,         instr_handler},
986     {BWRITERSIO_LOOP,           instr_handler},
987     {BWRITERSIO_RET,            instr_handler},
988     {BWRITERSIO_ENDLOOP,        instr_handler},
989
990     {BWRITERSIO_SETP,           instr_handler},
991     {BWRITERSIO_BREAKP,         instr_handler},
992
993     {BWRITERSIO_END,            NULL},
994 };
995
996 static const struct bytecode_backend vs_2_x_backend = {
997     vs_2_header,
998     end,
999     vs_2_srcreg,
1000     vs_12_dstreg,
1001     sm_2_opcode,
1002     vs_2_x_handlers
1003 };
1004
1005 static void write_samplers(const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1006     DWORD i;
1007     DWORD instr_dcl = D3DSIO_DCL | (2 << D3DSI_INSTLENGTH_SHIFT);
1008     DWORD token;
1009     const DWORD reg = (1<<31) |
1010                       ((D3DSPR_SAMPLER << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
1011                       ((D3DSPR_SAMPLER << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
1012                       D3DSP_WRITEMASK_ALL;
1013
1014     for(i = 0; i < shader->num_samplers; i++) {
1015         /* Write the DCL instruction */
1016         put_dword(buffer, instr_dcl);
1017         token = (1<<31);
1018         /* Already shifted */
1019         token |= (d3d9_sampler(shader->samplers[i].type)) & D3DSP_TEXTURETYPE_MASK;
1020         put_dword(buffer, token);
1021         put_dword(buffer, reg | (shader->samplers[i].regnum & D3DSP_REGNUM_MASK));
1022     }
1023 }
1024
1025 static void ps_2_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1026     HRESULT hr = find_ps_builtin_semantics(This, shader, 8);
1027     if(FAILED(hr)) {
1028         This->state = hr;
1029         return;
1030     }
1031
1032     /* Declare the shader type and version */
1033     put_dword(buffer, This->version);
1034     write_samplers(shader, buffer);
1035     write_constF(shader, buffer, TRUE);
1036     write_constB(shader, buffer, TRUE);
1037     write_constI(shader, buffer, TRUE);
1038 }
1039
1040 static void ps_2_srcreg(struct bc_writer *This,
1041                         const struct shader_reg *reg,
1042                         struct bytecode_buffer *buffer) {
1043     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1044     DWORD d3d9reg;
1045     if(reg->rel_reg) {
1046         WARN("Relative addressing not supported in <= ps_3_0\n");
1047         This->state = E_INVALIDARG;
1048         return;
1049     }
1050
1051     switch(reg->type) {
1052         case BWRITERSPR_INPUT:
1053             token |= map_ps_input(This, reg);
1054             break;
1055
1056             /* Can be mapped 1:1 */
1057         case BWRITERSPR_TEMP:
1058         case BWRITERSPR_CONST:
1059         case BWRITERSPR_COLOROUT:
1060         case BWRITERSPR_CONSTBOOL:
1061         case BWRITERSPR_CONSTINT:
1062         case BWRITERSPR_SAMPLER:
1063         case BWRITERSPR_LABEL:
1064         case BWRITERSPR_DEPTHOUT:
1065             d3d9reg = d3d9_register(reg->type);
1066             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1067             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1068             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1069             break;
1070
1071         case BWRITERSPR_PREDICATE:
1072             if(This->version != BWRITERPS_VERSION(2, 1)){
1073                 WARN("Predicate register not supported in ps_2_0\n");
1074                 This->state = E_INVALIDARG;
1075             }
1076             if(reg->regnum) {
1077                 WARN("Predicate register with regnum %u not supported\n",
1078                      reg->regnum);
1079                 This->state = E_INVALIDARG;
1080             }
1081             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1082             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1083             token |= 0 & D3DSP_REGNUM_MASK; /* No shift */
1084             break;
1085
1086         default:
1087             WARN("Invalid register type for ps_2_0 shader\n");
1088             This->state = E_INVALIDARG;
1089             return;
1090     }
1091
1092     token |= d3d9_swizzle(reg->swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1093
1094     token |= d3d9_srcmod(reg->srcmod);
1095     put_dword(buffer, token);
1096 }
1097
1098 static void ps_2_0_dstreg(struct bc_writer *This,
1099                           const struct shader_reg *reg,
1100                           struct bytecode_buffer *buffer,
1101                           DWORD shift, DWORD mod) {
1102     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1103     DWORD d3d9reg;
1104
1105     if(reg->rel_reg) {
1106         WARN("Relative addressing not supported for destination registers\n");
1107         This->state = E_INVALIDARG;
1108         return;
1109     }
1110
1111     switch(reg->type) {
1112         case BWRITERSPR_TEMP: /* 1:1 mapping */
1113         case BWRITERSPR_COLOROUT:
1114         case BWRITERSPR_DEPTHOUT:
1115             d3d9reg = d3d9_register(reg->type);
1116             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1117             token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1118             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1119             break;
1120
1121         case BWRITERSPR_PREDICATE:
1122             if(This->version != BWRITERPS_VERSION(2, 1)){
1123                 WARN("Predicate register not supported in ps_2_0\n");
1124                 This->state = E_INVALIDARG;
1125             }
1126             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1127             token |= (D3DSPR_PREDICATE << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1128             token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1129             break;
1130
1131         /* texkill uses the input register as a destination parameter */
1132         case BWRITERSPR_INPUT:
1133             token |= map_ps_input(This, reg);
1134             break;
1135
1136         default:
1137             WARN("Invalid dest register type for 2.x pshader\n");
1138             This->state = E_INVALIDARG;
1139             return;
1140     }
1141
1142     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1143     token |= d3d9_dstmod(mod);
1144
1145     token |= d3d9_writemask(reg->writemask);
1146     put_dword(buffer, token);
1147 }
1148
1149 static const struct instr_handler_table ps_2_0_handlers[] = {
1150     {BWRITERSIO_ADD,            instr_handler},
1151     {BWRITERSIO_NOP,            instr_handler},
1152     {BWRITERSIO_MOV,            instr_handler},
1153     {BWRITERSIO_SUB,            instr_handler},
1154     {BWRITERSIO_MAD,            instr_handler},
1155     {BWRITERSIO_MUL,            instr_handler},
1156     {BWRITERSIO_RCP,            instr_handler},
1157     {BWRITERSIO_RSQ,            instr_handler},
1158     {BWRITERSIO_DP3,            instr_handler},
1159     {BWRITERSIO_DP4,            instr_handler},
1160     {BWRITERSIO_MIN,            instr_handler},
1161     {BWRITERSIO_MAX,            instr_handler},
1162     {BWRITERSIO_ABS,            instr_handler},
1163     {BWRITERSIO_EXP,            instr_handler},
1164     {BWRITERSIO_LOG,            instr_handler},
1165     {BWRITERSIO_EXPP,           instr_handler},
1166     {BWRITERSIO_LOGP,           instr_handler},
1167     {BWRITERSIO_LRP,            instr_handler},
1168     {BWRITERSIO_FRC,            instr_handler},
1169     {BWRITERSIO_CRS,            instr_handler},
1170     {BWRITERSIO_NRM,            instr_handler},
1171     {BWRITERSIO_SINCOS,         instr_handler},
1172     {BWRITERSIO_M4x4,           instr_handler},
1173     {BWRITERSIO_M4x3,           instr_handler},
1174     {BWRITERSIO_M3x4,           instr_handler},
1175     {BWRITERSIO_M3x3,           instr_handler},
1176     {BWRITERSIO_M3x2,           instr_handler},
1177     {BWRITERSIO_POW,            instr_handler},
1178     {BWRITERSIO_DP2ADD,         instr_handler},
1179     {BWRITERSIO_CMP,            instr_handler},
1180
1181     {BWRITERSIO_TEX,            instr_handler},
1182     {BWRITERSIO_TEXLDP,         instr_handler},
1183     {BWRITERSIO_TEXLDB,         instr_handler},
1184     {BWRITERSIO_TEXKILL,        instr_handler},
1185
1186     {BWRITERSIO_END,            NULL},
1187 };
1188
1189 static const struct bytecode_backend ps_2_0_backend = {
1190     ps_2_header,
1191     end,
1192     ps_2_srcreg,
1193     ps_2_0_dstreg,
1194     sm_2_opcode,
1195     ps_2_0_handlers
1196 };
1197
1198 static const struct instr_handler_table ps_2_x_handlers[] = {
1199     {BWRITERSIO_ADD,            instr_handler},
1200     {BWRITERSIO_NOP,            instr_handler},
1201     {BWRITERSIO_MOV,            instr_handler},
1202     {BWRITERSIO_SUB,            instr_handler},
1203     {BWRITERSIO_MAD,            instr_handler},
1204     {BWRITERSIO_MUL,            instr_handler},
1205     {BWRITERSIO_RCP,            instr_handler},
1206     {BWRITERSIO_RSQ,            instr_handler},
1207     {BWRITERSIO_DP3,            instr_handler},
1208     {BWRITERSIO_DP4,            instr_handler},
1209     {BWRITERSIO_MIN,            instr_handler},
1210     {BWRITERSIO_MAX,            instr_handler},
1211     {BWRITERSIO_ABS,            instr_handler},
1212     {BWRITERSIO_EXP,            instr_handler},
1213     {BWRITERSIO_LOG,            instr_handler},
1214     {BWRITERSIO_EXPP,           instr_handler},
1215     {BWRITERSIO_LOGP,           instr_handler},
1216     {BWRITERSIO_LRP,            instr_handler},
1217     {BWRITERSIO_FRC,            instr_handler},
1218     {BWRITERSIO_CRS,            instr_handler},
1219     {BWRITERSIO_NRM,            instr_handler},
1220     {BWRITERSIO_SINCOS,         instr_handler},
1221     {BWRITERSIO_M4x4,           instr_handler},
1222     {BWRITERSIO_M4x3,           instr_handler},
1223     {BWRITERSIO_M3x4,           instr_handler},
1224     {BWRITERSIO_M3x3,           instr_handler},
1225     {BWRITERSIO_M3x2,           instr_handler},
1226     {BWRITERSIO_POW,            instr_handler},
1227     {BWRITERSIO_DP2ADD,         instr_handler},
1228     {BWRITERSIO_CMP,            instr_handler},
1229
1230     {BWRITERSIO_CALL,           instr_handler},
1231     {BWRITERSIO_CALLNZ,         instr_handler},
1232     {BWRITERSIO_REP,            instr_handler},
1233     {BWRITERSIO_ENDREP,         instr_handler},
1234     {BWRITERSIO_IF,             instr_handler},
1235     {BWRITERSIO_LABEL,          instr_handler},
1236     {BWRITERSIO_IFC,            instr_handler},
1237     {BWRITERSIO_ELSE,           instr_handler},
1238     {BWRITERSIO_ENDIF,          instr_handler},
1239     {BWRITERSIO_BREAK,          instr_handler},
1240     {BWRITERSIO_BREAKC,         instr_handler},
1241     {BWRITERSIO_RET,            instr_handler},
1242
1243     {BWRITERSIO_TEX,            instr_handler},
1244     {BWRITERSIO_TEXLDP,         instr_handler},
1245     {BWRITERSIO_TEXLDB,         instr_handler},
1246     {BWRITERSIO_TEXKILL,        instr_handler},
1247     {BWRITERSIO_DSX,            instr_handler},
1248     {BWRITERSIO_DSY,            instr_handler},
1249
1250     {BWRITERSIO_SETP,           instr_handler},
1251     {BWRITERSIO_BREAKP,         instr_handler},
1252
1253     {BWRITERSIO_TEXLDD,         instr_handler},
1254
1255     {BWRITERSIO_END,            NULL},
1256 };
1257
1258 static const struct bytecode_backend ps_2_x_backend = {
1259     ps_2_header,
1260     end,
1261     ps_2_srcreg,
1262     ps_2_0_dstreg,
1263     sm_2_opcode,
1264     ps_2_x_handlers
1265 };
1266
1267 static void sm_3_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1268     /* Declare the shader type and version */
1269     put_dword(buffer, This->version);
1270
1271     write_declarations(buffer, TRUE, shader->inputs, shader->num_inputs, D3DSPR_INPUT);
1272     write_declarations(buffer, TRUE, shader->outputs, shader->num_outputs, D3DSPR_OUTPUT);
1273     write_constF(shader, buffer, TRUE);
1274     write_constB(shader, buffer, TRUE);
1275     write_constI(shader, buffer, TRUE);
1276     write_samplers(shader, buffer);
1277     return;
1278 }
1279
1280 static void sm_3_srcreg(struct bc_writer *This,
1281                         const struct shader_reg *reg,
1282                         struct bytecode_buffer *buffer) {
1283     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1284     DWORD d3d9reg;
1285
1286     d3d9reg = d3d9_register(reg->type);
1287     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1288     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1289     token |= reg->regnum & D3DSP_REGNUM_MASK;
1290
1291     token |= d3d9_swizzle(reg->swizzle) & D3DVS_SWIZZLE_MASK;
1292     token |= d3d9_srcmod(reg->srcmod);
1293
1294     if(reg->rel_reg) {
1295         if(reg->type == BWRITERSPR_CONST && This->version == BWRITERPS_VERSION(3, 0)) {
1296             WARN("c%u[...] is unsupported in ps_3_0\n", reg->regnum);
1297             This->state = E_INVALIDARG;
1298             return;
1299         }
1300         if(((reg->rel_reg->type == BWRITERSPR_ADDR && This->version == BWRITERVS_VERSION(3, 0)) ||
1301            reg->rel_reg->type == BWRITERSPR_LOOP) &&
1302            reg->rel_reg->regnum == 0) {
1303             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1304         } else {
1305             WARN("Unsupported relative addressing register\n");
1306             This->state = E_INVALIDARG;
1307             return;
1308         }
1309     }
1310
1311     put_dword(buffer, token);
1312
1313     /* vs_2_0 and newer write the register containing the index explicitly in the
1314      * binary code
1315      */
1316     if(token & D3DVS_ADDRMODE_RELATIVE) {
1317         sm_3_srcreg(This, reg->rel_reg, buffer);
1318     }
1319 }
1320
1321 static void sm_3_dstreg(struct bc_writer *This,
1322                         const struct shader_reg *reg,
1323                         struct bytecode_buffer *buffer,
1324                         DWORD shift, DWORD mod) {
1325     DWORD token = (1 << 31); /* Bit 31 of registers is 1 */
1326     DWORD d3d9reg;
1327
1328     if(reg->rel_reg) {
1329         if(This->version == BWRITERVS_VERSION(3, 0) &&
1330            reg->type == BWRITERSPR_OUTPUT) {
1331             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1332         } else {
1333             WARN("Relative addressing not supported for this shader type or register type\n");
1334             This->state = E_INVALIDARG;
1335             return;
1336         }
1337     }
1338
1339     d3d9reg = d3d9_register(reg->type);
1340     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK;
1341     token |= (d3d9reg << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2;
1342     token |= reg->regnum & D3DSP_REGNUM_MASK; /* No shift */
1343
1344     token |= d3d9_dstmod(mod);
1345
1346     token |= d3d9_writemask(reg->writemask);
1347     put_dword(buffer, token);
1348
1349     /* vs_2_0 and newer write the register containing the index explicitly in the
1350      * binary code
1351      */
1352     if(token & D3DVS_ADDRMODE_RELATIVE) {
1353         sm_3_srcreg(This, reg->rel_reg, buffer);
1354     }
1355 }
1356
1357 static const struct instr_handler_table vs_3_handlers[] = {
1358     {BWRITERSIO_ADD,            instr_handler},
1359     {BWRITERSIO_NOP,            instr_handler},
1360     {BWRITERSIO_MOV,            instr_handler},
1361     {BWRITERSIO_SUB,            instr_handler},
1362     {BWRITERSIO_MAD,            instr_handler},
1363     {BWRITERSIO_MUL,            instr_handler},
1364     {BWRITERSIO_RCP,            instr_handler},
1365     {BWRITERSIO_RSQ,            instr_handler},
1366     {BWRITERSIO_DP3,            instr_handler},
1367     {BWRITERSIO_DP4,            instr_handler},
1368     {BWRITERSIO_MIN,            instr_handler},
1369     {BWRITERSIO_MAX,            instr_handler},
1370     {BWRITERSIO_SLT,            instr_handler},
1371     {BWRITERSIO_SGE,            instr_handler},
1372     {BWRITERSIO_ABS,            instr_handler},
1373     {BWRITERSIO_EXP,            instr_handler},
1374     {BWRITERSIO_LOG,            instr_handler},
1375     {BWRITERSIO_EXPP,           instr_handler},
1376     {BWRITERSIO_LOGP,           instr_handler},
1377     {BWRITERSIO_DST,            instr_handler},
1378     {BWRITERSIO_LRP,            instr_handler},
1379     {BWRITERSIO_FRC,            instr_handler},
1380     {BWRITERSIO_CRS,            instr_handler},
1381     {BWRITERSIO_SGN,            instr_handler},
1382     {BWRITERSIO_NRM,            instr_handler},
1383     {BWRITERSIO_SINCOS,         instr_handler},
1384     {BWRITERSIO_M4x4,           instr_handler},
1385     {BWRITERSIO_M4x3,           instr_handler},
1386     {BWRITERSIO_M3x4,           instr_handler},
1387     {BWRITERSIO_M3x3,           instr_handler},
1388     {BWRITERSIO_M3x2,           instr_handler},
1389     {BWRITERSIO_LIT,            instr_handler},
1390     {BWRITERSIO_POW,            instr_handler},
1391     {BWRITERSIO_MOVA,           instr_handler},
1392
1393     {BWRITERSIO_CALL,           instr_handler},
1394     {BWRITERSIO_CALLNZ,         instr_handler},
1395     {BWRITERSIO_REP,            instr_handler},
1396     {BWRITERSIO_ENDREP,         instr_handler},
1397     {BWRITERSIO_IF,             instr_handler},
1398     {BWRITERSIO_LABEL,          instr_handler},
1399     {BWRITERSIO_IFC,            instr_handler},
1400     {BWRITERSIO_ELSE,           instr_handler},
1401     {BWRITERSIO_ENDIF,          instr_handler},
1402     {BWRITERSIO_BREAK,          instr_handler},
1403     {BWRITERSIO_BREAKC,         instr_handler},
1404     {BWRITERSIO_LOOP,           instr_handler},
1405     {BWRITERSIO_RET,            instr_handler},
1406     {BWRITERSIO_ENDLOOP,        instr_handler},
1407
1408     {BWRITERSIO_SETP,           instr_handler},
1409     {BWRITERSIO_BREAKP,         instr_handler},
1410     {BWRITERSIO_TEXLDL,         instr_handler},
1411
1412     {BWRITERSIO_END,            NULL},
1413 };
1414
1415 static const struct bytecode_backend vs_3_backend = {
1416     sm_3_header,
1417     end,
1418     sm_3_srcreg,
1419     sm_3_dstreg,
1420     sm_2_opcode,
1421     vs_3_handlers
1422 };
1423
1424 static const struct instr_handler_table ps_3_handlers[] = {
1425     {BWRITERSIO_ADD,            instr_handler},
1426     {BWRITERSIO_NOP,            instr_handler},
1427     {BWRITERSIO_MOV,            instr_handler},
1428     {BWRITERSIO_SUB,            instr_handler},
1429     {BWRITERSIO_MAD,            instr_handler},
1430     {BWRITERSIO_MUL,            instr_handler},
1431     {BWRITERSIO_RCP,            instr_handler},
1432     {BWRITERSIO_RSQ,            instr_handler},
1433     {BWRITERSIO_DP3,            instr_handler},
1434     {BWRITERSIO_DP4,            instr_handler},
1435     {BWRITERSIO_MIN,            instr_handler},
1436     {BWRITERSIO_MAX,            instr_handler},
1437     {BWRITERSIO_ABS,            instr_handler},
1438     {BWRITERSIO_EXP,            instr_handler},
1439     {BWRITERSIO_LOG,            instr_handler},
1440     {BWRITERSIO_EXPP,           instr_handler},
1441     {BWRITERSIO_LOGP,           instr_handler},
1442     {BWRITERSIO_LRP,            instr_handler},
1443     {BWRITERSIO_FRC,            instr_handler},
1444     {BWRITERSIO_CRS,            instr_handler},
1445     {BWRITERSIO_NRM,            instr_handler},
1446     {BWRITERSIO_SINCOS,         instr_handler},
1447     {BWRITERSIO_M4x4,           instr_handler},
1448     {BWRITERSIO_M4x3,           instr_handler},
1449     {BWRITERSIO_M3x4,           instr_handler},
1450     {BWRITERSIO_M3x3,           instr_handler},
1451     {BWRITERSIO_M3x2,           instr_handler},
1452     {BWRITERSIO_POW,            instr_handler},
1453     {BWRITERSIO_DP2ADD,         instr_handler},
1454     {BWRITERSIO_CMP,            instr_handler},
1455
1456     {BWRITERSIO_CALL,           instr_handler},
1457     {BWRITERSIO_CALLNZ,         instr_handler},
1458     {BWRITERSIO_REP,            instr_handler},
1459     {BWRITERSIO_ENDREP,         instr_handler},
1460     {BWRITERSIO_IF,             instr_handler},
1461     {BWRITERSIO_LABEL,          instr_handler},
1462     {BWRITERSIO_IFC,            instr_handler},
1463     {BWRITERSIO_ELSE,           instr_handler},
1464     {BWRITERSIO_ENDIF,          instr_handler},
1465     {BWRITERSIO_BREAK,          instr_handler},
1466     {BWRITERSIO_BREAKC,         instr_handler},
1467     {BWRITERSIO_LOOP,           instr_handler},
1468     {BWRITERSIO_RET,            instr_handler},
1469     {BWRITERSIO_ENDLOOP,        instr_handler},
1470
1471     {BWRITERSIO_SETP,           instr_handler},
1472     {BWRITERSIO_BREAKP,         instr_handler},
1473     {BWRITERSIO_TEXLDL,         instr_handler},
1474
1475     {BWRITERSIO_TEX,            instr_handler},
1476     {BWRITERSIO_TEXLDP,         instr_handler},
1477     {BWRITERSIO_TEXLDB,         instr_handler},
1478     {BWRITERSIO_TEXKILL,        instr_handler},
1479     {BWRITERSIO_DSX,            instr_handler},
1480     {BWRITERSIO_DSY,            instr_handler},
1481     {BWRITERSIO_TEXLDD,         instr_handler},
1482
1483     {BWRITERSIO_END,            NULL},
1484 };
1485
1486 static const struct bytecode_backend ps_3_backend = {
1487     sm_3_header,
1488     end,
1489     sm_3_srcreg,
1490     sm_3_dstreg,
1491     sm_2_opcode,
1492     ps_3_handlers
1493 };
1494
1495 static void init_vs20_dx9_writer(struct bc_writer *writer) {
1496     TRACE("Creating DirectX9 vertex shader 2.0 writer\n");
1497     writer->funcs = &vs_2_0_backend;
1498 }
1499
1500 static void init_vs2x_dx9_writer(struct bc_writer *writer) {
1501     TRACE("Creating DirectX9 vertex shader 2.x writer\n");
1502     writer->funcs = &vs_2_x_backend;
1503 }
1504
1505 static void init_vs30_dx9_writer(struct bc_writer *writer) {
1506     TRACE("Creating DirectX9 vertex shader 3.0 writer\n");
1507     writer->funcs = &vs_3_backend;
1508 }
1509
1510 static void init_ps20_dx9_writer(struct bc_writer *writer) {
1511     TRACE("Creating DirectX9 pixel shader 2.0 writer\n");
1512     writer->funcs = &ps_2_0_backend;
1513 }
1514
1515 static void init_ps2x_dx9_writer(struct bc_writer *writer) {
1516     TRACE("Creating DirectX9 pixel shader 2.x writer\n");
1517     writer->funcs = &ps_2_x_backend;
1518 }
1519
1520 static void init_ps30_dx9_writer(struct bc_writer *writer) {
1521     TRACE("Creating DirectX9 pixel shader 3.0 writer\n");
1522     writer->funcs = &ps_3_backend;
1523 }
1524
1525 static struct bc_writer *create_writer(DWORD version, DWORD dxversion) {
1526     struct bc_writer *ret = asm_alloc(sizeof(*ret));
1527
1528     if(!ret) {
1529         WARN("Failed to allocate a bytecode writer instance\n");
1530         return NULL;
1531     }
1532
1533     switch(version) {
1534         case BWRITERVS_VERSION(1, 0):
1535             if(dxversion != 9) {
1536                 WARN("Unsupported dxversion for vertex shader 1.0 requested: %u\n", dxversion);
1537                 goto fail;
1538             }
1539             /* TODO: Set the appropriate writer backend */
1540             break;
1541         case BWRITERVS_VERSION(1, 1):
1542             if(dxversion != 9) {
1543                 WARN("Unsupported dxversion for vertex shader 1.1 requested: %u\n", dxversion);
1544                 goto fail;
1545             }
1546             /* TODO: Set the appropriate writer backend */
1547             break;
1548         case BWRITERVS_VERSION(2, 0):
1549             if(dxversion != 9) {
1550                 WARN("Unsupported dxversion for vertex shader 2.0 requested: %u\n", dxversion);
1551                 goto fail;
1552             }
1553             init_vs20_dx9_writer(ret);
1554             break;
1555         case BWRITERVS_VERSION(2, 1):
1556             if(dxversion != 9) {
1557                 WARN("Unsupported dxversion for vertex shader 2.x requested: %u\n", dxversion);
1558                 goto fail;
1559             }
1560             init_vs2x_dx9_writer(ret);
1561             break;
1562         case BWRITERVS_VERSION(3, 0):
1563             if(dxversion != 9) {
1564                 WARN("Unsupported dxversion for vertex shader 3.0 requested: %u\n", dxversion);
1565                 goto fail;
1566             }
1567             init_vs30_dx9_writer(ret);
1568             break;
1569
1570         case BWRITERPS_VERSION(1, 0):
1571             if(dxversion != 9) {
1572                 WARN("Unsupported dxversion for pixel shader 1.0 requested: %u\n", dxversion);
1573                 goto fail;
1574             }
1575             /* TODO: Set the appropriate writer backend */
1576             break;
1577         case BWRITERPS_VERSION(1, 1):
1578             if(dxversion != 9) {
1579                 WARN("Unsupported dxversion for pixel shader 1.1 requested: %u\n", dxversion);
1580                 goto fail;
1581             }
1582             /* TODO: Set the appropriate writer backend */
1583             break;
1584         case BWRITERPS_VERSION(1, 2):
1585             if(dxversion != 9) {
1586                 WARN("Unsupported dxversion for pixel shader 1.2 requested: %u\n", dxversion);
1587                 goto fail;
1588             }
1589             /* TODO: Set the appropriate writer backend */
1590             break;
1591         case BWRITERPS_VERSION(1, 3):
1592             if(dxversion != 9) {
1593                 WARN("Unsupported dxversion for pixel shader 1.3 requested: %u\n", dxversion);
1594                 goto fail;
1595             }
1596             /* TODO: Set the appropriate writer backend */
1597             break;
1598         case BWRITERPS_VERSION(1, 4):
1599             if(dxversion != 9) {
1600                 WARN("Unsupported dxversion for pixel shader 1.4 requested: %u\n", dxversion);
1601                 goto fail;
1602             }
1603             /* TODO: Set the appropriate writer backend */
1604             break;
1605
1606         case BWRITERPS_VERSION(2, 0):
1607             if(dxversion != 9) {
1608                 WARN("Unsupported dxversion for pixel shader 2.0 requested: %u\n", dxversion);
1609                 goto fail;
1610             }
1611             init_ps20_dx9_writer(ret);
1612             break;
1613
1614         case BWRITERPS_VERSION(2, 1):
1615             if(dxversion != 9) {
1616                 WARN("Unsupported dxversion for pixel shader 2.x requested: %u\n", dxversion);
1617                 goto fail;
1618             }
1619             init_ps2x_dx9_writer(ret);
1620             break;
1621
1622         case BWRITERPS_VERSION(3, 0):
1623             if(dxversion != 9) {
1624                 WARN("Unsupported dxversion for pixel shader 3.0 requested: %u\n", dxversion);
1625                 goto fail;
1626             }
1627             init_ps30_dx9_writer(ret);
1628             break;
1629
1630         default:
1631             WARN("Unexpected shader version requested: %08x\n", version);
1632             goto fail;
1633     }
1634     ret->version = version;
1635     return ret;
1636
1637 fail:
1638     asm_free(ret);
1639     return NULL;
1640 }
1641
1642 static HRESULT call_instr_handler(struct bc_writer *writer,
1643                                   const struct instruction *instr,
1644                                   struct bytecode_buffer *buffer) {
1645     DWORD i=0;
1646
1647     while(writer->funcs->instructions[i].opcode != BWRITERSIO_END) {
1648         if(instr->opcode == writer->funcs->instructions[i].opcode) {
1649             if(!writer->funcs->instructions[i].func) {
1650                 WARN("Opcode %u not supported by this profile\n", instr->opcode);
1651                 return E_INVALIDARG;
1652             }
1653             writer->funcs->instructions[i].func(writer, instr, buffer);
1654             return S_OK;
1655         }
1656         i++;
1657     }
1658
1659     FIXME("Unhandled instruction %u\n", instr->opcode);
1660     return E_INVALIDARG;
1661 }
1662
1663 /* SlWriteBytecode (wineshader.@)
1664  *
1665  * Writes shader version specific bytecode from the shader passed in.
1666  * The returned bytecode can be passed to the Direct3D runtime like
1667  * IDirect3DDevice9::Create*Shader.
1668  *
1669  * Parameters:
1670  *  shader: Shader to translate into bytecode
1671  *  version: Shader version to generate(d3d version token)
1672  *  dxversion: DirectX version the code targets
1673  *  result: the resulting shader bytecode
1674  *
1675  * Return values:
1676  *  S_OK on success
1677  */
1678 DWORD SlWriteBytecode(const struct bwriter_shader *shader, int dxversion, DWORD **result) {
1679     struct bc_writer *writer;
1680     struct bytecode_buffer *buffer = NULL;
1681     HRESULT hr;
1682     unsigned int i;
1683
1684     if(!shader){
1685         ERR("NULL shader structure, aborting\n");
1686         return E_FAIL;
1687     }
1688     writer = create_writer(shader->version, dxversion);
1689     *result = NULL;
1690
1691     if(!writer) {
1692         WARN("Could not create a bytecode writer instance. Either unsupported version\n");
1693         WARN("or out of memory\n");
1694         hr = E_FAIL;
1695         goto error;
1696     }
1697
1698     buffer = allocate_buffer();
1699     if(!buffer) {
1700         WARN("Failed to allocate a buffer for the shader bytecode\n");
1701         hr = E_FAIL;
1702         goto error;
1703     }
1704
1705     writer->funcs->header(writer, shader, buffer);
1706     if(FAILED(writer->state)) {
1707         hr = writer->state;
1708         goto error;
1709     }
1710
1711     for(i = 0; i < shader->num_instrs; i++) {
1712         hr = call_instr_handler(writer, shader->instr[i], buffer);
1713         if(FAILED(hr)) {
1714             goto error;
1715         }
1716     }
1717
1718     if(FAILED(writer->state)) {
1719         hr = writer->state;
1720         goto error;
1721     }
1722
1723     writer->funcs->end(writer, shader, buffer);
1724
1725     if(FAILED(buffer->state)) {
1726         hr = buffer->state;
1727         goto error;
1728     }
1729
1730     /* Cut off unneeded memory from the result buffer */
1731     *result = asm_realloc(buffer->data,
1732                          sizeof(DWORD) * buffer->size);
1733     if(!*result) {
1734         *result = buffer->data;
1735     }
1736     buffer->data = NULL;
1737     hr = S_OK;
1738
1739 error:
1740     if(buffer) {
1741         asm_free(buffer->data);
1742         asm_free(buffer);
1743     }
1744     asm_free(writer);
1745     return hr;
1746 }
1747
1748 void SlDeleteShader(struct bwriter_shader *shader) {
1749     unsigned int i, j;
1750
1751     TRACE("Deleting shader %p\n", shader);
1752
1753     for(i = 0; i < shader->num_cf; i++) {
1754         asm_free(shader->constF[i]);
1755     }
1756     asm_free(shader->constF);
1757     for(i = 0; i < shader->num_ci; i++) {
1758         asm_free(shader->constI[i]);
1759     }
1760     asm_free(shader->constI);
1761     for(i = 0; i < shader->num_cb; i++) {
1762         asm_free(shader->constB[i]);
1763     }
1764     asm_free(shader->constB);
1765
1766     asm_free(shader->inputs);
1767     asm_free(shader->outputs);
1768     asm_free(shader->samplers);
1769
1770     for(i = 0; i < shader->num_instrs; i++) {
1771         for(j = 0; j < shader->instr[i]->num_srcs; j++) {
1772             asm_free(shader->instr[i]->src[j].rel_reg);
1773         }
1774         asm_free(shader->instr[i]->src);
1775         asm_free(shader->instr[i]);
1776     }
1777     asm_free(shader->instr);
1778
1779     asm_free(shader);
1780 }