2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2005 Oliver Stieber
7 * Copyright 2006 Ivan Gyurdiev
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27 #include "wined3d_private.h"
29 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
31 #define GLNAME_REQUIRE_GLSL ((const char *)1)
32 #define GLINFO_LOCATION (*gl_info)
34 typedef struct shader_reg_maps {
40 inline static BOOL shader_is_version_token(DWORD token) {
41 return shader_is_pshader_version(token) ||
42 shader_is_vshader_version(token);
46 SHADER_BUFFER* buffer,
47 const char *format, ...) {
49 char* base = buffer->buffer + buffer->bsize;
53 va_start(args, format);
54 rc = vsnprintf(base, SHADER_PGMSIZE - 1 - buffer->bsize, format, args);
57 if (rc < 0 || /* C89 */
58 rc > SHADER_PGMSIZE - 1 - buffer->bsize) { /* C99 */
60 ERR("The buffer allocated for the shader program string "
61 "is too small at %d bytes.\n", SHADER_PGMSIZE);
62 buffer->bsize = SHADER_PGMSIZE - 1;
68 TRACE("GL HW (%u, %u) : %s", buffer->lineNo, buffer->bsize, base);
72 const SHADER_OPCODE* shader_get_opcode(
73 IWineD3DBaseShader *iface, const DWORD code) {
75 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl*) iface;
78 DWORD version = This->baseShader.version;
79 DWORD hex_version = This->baseShader.hex_version;
80 const SHADER_OPCODE *shader_ins = This->baseShader.shader_ins;
82 /** TODO: use dichotomic search */
83 while (NULL != shader_ins[i].name) {
84 if (((code & D3DSI_OPCODE_MASK) == shader_ins[i].opcode) &&
85 (((hex_version >= shader_ins[i].min_version) && (hex_version <= shader_ins[i].max_version)) ||
86 ((shader_ins[i].min_version == 0) && (shader_ins[i].max_version == 0)))) {
87 return &shader_ins[i];
91 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n",
92 code, code, code & D3DSI_OPCODE_MASK, version);
96 /* Read a parameter opcode from the input stream,
97 * and possibly a relative addressing token.
98 * Return the number of tokens read */
100 IWineD3DBaseShader* iface,
105 /* PS >= 3.0 have relative addressing (with token)
106 * VS >= 2.0 have relative addressing (with token)
107 * VS >= 1.0 < 2.0 have relative addressing (without token)
108 * The version check below should work in general */
110 IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
111 char rel_token = D3DSHADER_VERSION_MAJOR(This->baseShader.hex_version) >= 2 &&
112 ((*pToken & D3DSHADER_ADDRESSMODE_MASK) == D3DSHADER_ADDRMODE_RELATIVE);
115 *addr_token = rel_token? *(pToken + 1): 0;
116 return rel_token? 2:1;
119 /* Return the number of parameters to skip for an opcode */
120 static inline int shader_skip_opcode(
121 IWineD3DBaseShaderImpl* This,
122 const SHADER_OPCODE* curOpcode,
123 DWORD opcode_token) {
125 /* Shaders >= 2.0 may contain address tokens, but fortunately they
126 * have a useful legnth mask - use it here. Shaders 1.0 contain no such tokens */
128 return (D3DSHADER_VERSION_MAJOR(This->baseShader.hex_version) >= 2)?
129 ((opcode_token & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT):
130 curOpcode->num_params;
133 /* Read the parameters of an unrecognized opcode from the input stream
134 * Return the number of tokens read.
136 * Note: This function assumes source or destination token format.
137 * It will not work with specially-formatted tokens like DEF or DCL,
138 * but hopefully those would be recognized */
140 int shader_skip_unrecognized(
141 IWineD3DBaseShader* iface,
142 const DWORD* pToken) {
147 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
148 while (*pToken & 0x80000000) {
150 DWORD param, addr_token;
151 tokens_read += shader_get_param(iface, pToken, ¶m, &addr_token);
152 pToken += tokens_read;
154 FIXME("Unrecognized opcode param: token=%08lX "
155 "addr_token=%08lX name=", param, addr_token);
156 shader_dump_param(iface, param, addr_token, i);
163 /* Note that this does not count the loop register
164 * as an address register. */
166 static void shader_get_registers_used(
167 IWineD3DBaseShader *iface,
168 shader_reg_maps* reg_maps,
169 CONST DWORD* pToken) {
171 IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
176 reg_maps->temporary = 0;
177 reg_maps->texcoord = 0;
178 reg_maps->address = 0;
180 while (D3DVS_END() != *pToken) {
181 CONST SHADER_OPCODE* curOpcode;
185 if (shader_is_version_token(*pToken)) {
190 } else if (shader_is_comment(*pToken)) {
191 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
193 pToken += comment_len;
198 opcode_token = *pToken++;
199 curOpcode = shader_get_opcode(iface, opcode_token);
201 /* Unhandled opcode, and its parameters */
202 if (NULL == curOpcode) {
203 while (*pToken & 0x80000000)
207 /* Skip declarations (for now) */
208 } else if (D3DSIO_DCL == curOpcode->opcode) {
209 pToken += curOpcode->num_params;
212 /* Skip definitions (for now) */
213 } else if (D3DSIO_DEF == curOpcode->opcode) {
214 pToken += curOpcode->num_params;
217 /* Set texture registers, and temporary registers */
221 /* This will loop over all the registers and try to
222 * make a bitmask of the ones we're interested in.
224 * Relative addressing tokens are ignored, but that's
225 * okay, since we'll catch any address registers when
226 * they are initialized (required by spec) */
228 limit = (opcode_token & D3DSHADER_INSTRUCTION_PREDICATED)?
229 curOpcode->num_params + 1: curOpcode->num_params;
231 for (i = 0; i < limit; ++i) {
233 DWORD param, addr_token, reg, regtype;
234 pToken += shader_get_param(iface, pToken, ¶m, &addr_token);
236 regtype = (param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT;
237 reg = param & D3DSP_REGNUM_MASK;
239 if (D3DSPR_TEXTURE == regtype) { /* vs: D3DSPR_ADDR */
241 if (shader_is_pshader_version(This->baseShader.hex_version))
242 reg_maps->texcoord |= (1 << reg);
244 reg_maps->address |= (1 << reg);
247 if (D3DSPR_TEMP == regtype)
248 reg_maps->temporary |= (1 << reg);
254 void shader_program_dump_decl_usage(
258 DWORD regtype = shader_get_regtype(param);
261 if (regtype == D3DSPR_SAMPLER) {
262 DWORD ttype = decl & D3DSP_TEXTURETYPE_MASK;
265 case D3DSTT_2D: TRACE("2d"); break;
266 case D3DSTT_CUBE: TRACE("cube"); break;
267 case D3DSTT_VOLUME: TRACE("volume"); break;
268 default: TRACE("unknown_ttype(%08lx)", ttype);
273 DWORD usage = decl & D3DSP_DCL_USAGE_MASK;
274 DWORD idx = (decl & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
277 case D3DDECLUSAGE_POSITION:
278 TRACE("%s%ld", "position", idx);
280 case D3DDECLUSAGE_BLENDINDICES:
281 TRACE("%s", "blend");
283 case D3DDECLUSAGE_BLENDWEIGHT:
284 TRACE("%s", "weight");
286 case D3DDECLUSAGE_NORMAL:
287 TRACE("%s%ld", "normal", idx);
289 case D3DDECLUSAGE_PSIZE:
290 TRACE("%s", "psize");
292 case D3DDECLUSAGE_COLOR:
294 TRACE("%s", "color");
296 TRACE("%s%ld", "specular", (idx - 1));
299 case D3DDECLUSAGE_TEXCOORD:
300 TRACE("%s%ld", "texture", idx);
302 case D3DDECLUSAGE_TANGENT:
303 TRACE("%s", "tangent");
305 case D3DDECLUSAGE_BINORMAL:
306 TRACE("%s", "binormal");
308 case D3DDECLUSAGE_TESSFACTOR:
309 TRACE("%s", "tessfactor");
311 case D3DDECLUSAGE_POSITIONT:
312 TRACE("%s%ld", "positionT", idx);
314 case D3DDECLUSAGE_FOG:
317 case D3DDECLUSAGE_DEPTH:
318 TRACE("%s", "depth");
320 case D3DDECLUSAGE_SAMPLE:
321 TRACE("%s", "sample");
324 FIXME("unknown_semantics(%08lx)", usage);
329 static void shader_dump_arr_entry(
330 IWineD3DBaseShader *iface,
332 const DWORD addr_token,
335 DWORD reg = param & D3DSP_REGNUM_MASK;
337 ((param & D3DSHADER_ADDRESSMODE_MASK) == D3DSHADER_ADDRMODE_RELATIVE);
342 shader_dump_param(iface, addr_token, 0, input);
350 void shader_dump_param(
351 IWineD3DBaseShader *iface,
353 const DWORD addr_token,
356 IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
357 static const char* rastout_reg_names[] = { "oPos", "oFog", "oPts" };
358 char swizzle_reg_chars[4];
360 DWORD reg = param & D3DSP_REGNUM_MASK;
361 DWORD regtype = shader_get_regtype(param);
363 /* There are some minor differences between pixel and vertex shaders */
364 BOOL pshader = shader_is_pshader_version(This->baseShader.hex_version);
366 /* For one, we'd prefer color components to be shown for pshaders.
367 * FIXME: use the swizzle function for this */
369 swizzle_reg_chars[0] = pshader? 'r': 'x';
370 swizzle_reg_chars[1] = pshader? 'g': 'y';
371 swizzle_reg_chars[2] = pshader? 'b': 'z';
372 swizzle_reg_chars[3] = pshader? 'a': 'w';
375 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
376 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
377 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
378 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
380 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
382 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NOT)
395 shader_dump_arr_entry(iface, param, addr_token, input);
397 case D3DSPR_TEXTURE: /* vs: case D3DSPR_ADDR */
398 TRACE("%c%lu", (pshader? 't':'a'), reg);
401 TRACE("%s", rastout_reg_names[reg]);
403 case D3DSPR_COLOROUT:
406 case D3DSPR_DEPTHOUT:
412 case D3DSPR_TEXCRDOUT:
414 /* Vertex shaders >= 3.0 use general purpose output registers
415 * (D3DSPR_OUTPUT), which can include an address token */
417 if (D3DSHADER_VERSION_MAJOR(This->baseShader.hex_version) >= 3) {
419 shader_dump_arr_entry(iface, param, addr_token, input);
424 case D3DSPR_CONSTINT:
426 shader_dump_arr_entry(iface, param, addr_token, input);
428 case D3DSPR_CONSTBOOL:
430 shader_dump_arr_entry(iface, param, addr_token, input);
441 case D3DSPR_PREDICATE:
445 TRACE("unhandled_rtype(%#lx)", regtype);
450 /* operand output (for modifiers and shift, see dump_ins_modifiers) */
452 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
454 if (param & D3DSP_WRITEMASK_0) TRACE("%c", swizzle_reg_chars[0]);
455 if (param & D3DSP_WRITEMASK_1) TRACE("%c", swizzle_reg_chars[1]);
456 if (param & D3DSP_WRITEMASK_2) TRACE("%c", swizzle_reg_chars[2]);
457 if (param & D3DSP_WRITEMASK_3) TRACE("%c", swizzle_reg_chars[3]);
462 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
463 DWORD swizzle_r = swizzle & 0x03;
464 DWORD swizzle_g = (swizzle >> 2) & 0x03;
465 DWORD swizzle_b = (swizzle >> 4) & 0x03;
466 DWORD swizzle_a = (swizzle >> 6) & 0x03;
468 if (0 != (param & D3DSP_SRCMOD_MASK)) {
469 DWORD mask = param & D3DSP_SRCMOD_MASK;
471 case D3DSPSM_NONE: break;
472 case D3DSPSM_NEG: break;
473 case D3DSPSM_NOT: break;
474 case D3DSPSM_BIAS: TRACE("_bias"); break;
475 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
476 case D3DSPSM_SIGN: TRACE("_bx2"); break;
477 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
478 case D3DSPSM_COMP: break;
479 case D3DSPSM_X2: TRACE("_x2"); break;
480 case D3DSPSM_X2NEG: TRACE("_x2"); break;
481 case D3DSPSM_DZ: TRACE("_dz"); break;
482 case D3DSPSM_DW: TRACE("_dw"); break;
484 TRACE("_unknown_modifier(%#lx)", mask >> D3DSP_SRCMOD_SHIFT);
489 * swizzle bits fields:
492 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
493 if (swizzle_r == swizzle_g &&
494 swizzle_r == swizzle_b &&
495 swizzle_r == swizzle_a) {
496 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
499 swizzle_reg_chars[swizzle_r],
500 swizzle_reg_chars[swizzle_g],
501 swizzle_reg_chars[swizzle_b],
502 swizzle_reg_chars[swizzle_a]);
508 /** Generate the variable & register declarations for the ARB_vertex_program
510 void generate_arb_declarations(
511 IWineD3DBaseShader *iface,
512 shader_reg_maps* reg_maps,
513 SHADER_BUFFER* buffer) {
515 IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
518 for(i = 0; i < This->baseShader.limits.temporary; i++) {
519 if (reg_maps->temporary & (1 << i))
520 shader_addline(buffer, "TEMP R%lu;\n", i);
523 for (i = 0; i < This->baseShader.limits.address; i++) {
524 if (reg_maps->address & (1 << i))
525 shader_addline(buffer, "ADDRESS A%ld;\n", i);
528 for(i = 0; i < This->baseShader.limits.texture; i++) {
529 if (reg_maps->texcoord & (1 << i))
530 shader_addline(buffer,"TEMP T%lu;\n", i);
533 /* Texture coordinate registers must be pre-loaded */
534 for (i = 0; i < This->baseShader.limits.texture; i++) {
535 if (reg_maps->texcoord & (1 << i))
536 shader_addline(buffer, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
539 /* Need to PARAM the environment parameters (constants) so we can use relative addressing */
540 shader_addline(buffer, "PARAM C[%d] = { program.env[0..%d] };\n",
541 This->baseShader.limits.constant_float,
542 This->baseShader.limits.constant_float - 1);
545 /** Generate the variable & register declarations for the GLSL
547 void generate_glsl_declarations(
548 IWineD3DBaseShader *iface,
549 shader_reg_maps* reg_maps,
550 SHADER_BUFFER* buffer) {
552 IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
555 FIXME("GLSL not fully implemented yet.\n");
557 /* Declare the constants (aka uniforms) */
558 shader_addline(buffer, "uniform vec4 C[%u];\n", This->baseShader.limits.constant_float);
560 /* Declare address variables */
561 for (i = 0; i < This->baseShader.limits.address; i++) {
562 if (reg_maps->address & (1 << i))
563 shader_addline(buffer, "ivec4 A%ld;\n", i);
566 /* Declare all named attributes (TODO: Add this to the reg_maps
567 * and only declare those that are needed) */
568 for (i = 0; i < This->baseShader.limits.attributes; i++) {
569 shader_addline(buffer, "attribute vec4 attrib%i;\n", i);
572 /* Declare temporary variables */
573 for(i = 0; i < This->baseShader.limits.temporary; i++) {
574 if (reg_maps->temporary & (1 << i))
575 shader_addline(buffer, "vec4 R%lu;\n", i);
578 /* Start the main program */
579 shader_addline(buffer, "void main() {\n");
582 /** Shared code in order to generate the bulk of the shader string.
583 Use the shader_header_fct & shader_footer_fct to add strings
584 that are specific to pixel or vertex functions
585 NOTE: A description of how to parse tokens can be found at:
586 http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
587 void generate_base_shader(
588 IWineD3DBaseShader *iface,
589 SHADER_BUFFER* buffer,
590 CONST DWORD* pFunction) {
592 IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
593 const DWORD *pToken = pFunction;
594 const SHADER_OPCODE *curOpcode = NULL;
595 SHADER_HANDLER hw_fct = NULL;
598 shader_reg_maps reg_maps;
600 /* Initialize current parsing state */
601 This->baseShader.parse_state.current_row = 0;
603 /* First pass: figure out which temporary and texture registers are used */
604 shader_get_registers_used(iface, ®_maps, pToken);
606 /* TODO: check register usage against GL/Directx limits, and fail if they're exceeded
607 nUseAddressRegister < = GL_MAX_PROGRAM_ADDRESS_REGISTERS_AR
608 nUseTempRegister <= GL_MAX_PROGRAM_LOCAL_PARAMETERS_ARB
611 /* Pre-declare registers */
612 if (wined3d_settings.shader_mode == SHADER_GLSL) {
613 generate_glsl_declarations(iface, ®_maps, buffer);
615 generate_arb_declarations(iface, ®_maps, buffer);
618 /* Second pass, process opcodes */
619 if (NULL != pToken) {
620 while (D3DPS_END() != *pToken) {
622 /* Skip version token */
623 if (shader_is_version_token(*pToken)) {
628 /* Skip comment tokens */
629 if (shader_is_comment(*pToken)) {
630 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
632 TRACE("#%s\n", (char*)pToken);
633 pToken += comment_len;
638 opcode_token = *pToken++;
639 curOpcode = shader_get_opcode(iface, opcode_token);
640 hw_fct = (wined3d_settings.shader_mode ==
641 SHADER_GLSL ? curOpcode->hw_glsl_fct : curOpcode->hw_fct);
643 /* Unknown opcode and its parameters */
644 if (NULL == curOpcode) {
645 FIXME("Unrecognized opcode: token=%08lX\n", opcode_token);
646 pToken += shader_skip_unrecognized(iface, pToken);
648 /* If a generator function is set for current shader target, use it */
649 } else if (hw_fct != NULL) {
651 SHADER_OPCODE_ARG hw_arg;
653 hw_arg.shader = iface;
654 hw_arg.opcode = curOpcode;
655 hw_arg.buffer = buffer;
657 if (curOpcode->num_params > 0) {
659 DWORD param, addr_token = 0;
661 /* DCL instruction has usage dst parameter, not register */
662 if (curOpcode->opcode == D3DSIO_DCL)
665 pToken += shader_get_param(iface, pToken, ¶m, &addr_token);
668 hw_arg.dst_addr = addr_token;
670 if (opcode_token & D3DSHADER_INSTRUCTION_PREDICATED)
671 hw_arg.predicate = *pToken++;
673 for (i = 1; i < curOpcode->num_params; i++) {
674 /* DEF* instructions have constant src parameters, not registers */
675 if (curOpcode->opcode == D3DSIO_DEF ||
676 curOpcode->opcode == D3DSIO_DEFI ||
677 curOpcode->opcode == D3DSIO_DEFB) {
681 pToken += shader_get_param(iface, pToken, ¶m, &addr_token);
683 hw_arg.src[i-1] = param;
684 hw_arg.src_addr[i-1] = addr_token;
688 /* Call appropriate function for output target */
693 /* Unless we encounter a no-op command, this opcode is unrecognized */
694 if (curOpcode->opcode != D3DSIO_NOP) {
695 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
696 pToken += shader_skip_opcode(This, curOpcode, opcode_token);
700 /* TODO: What about result.depth? */
705 /** Prints the GLSL info log which will contain error messages if they exist */
706 void print_glsl_info_log(
707 WineD3D_GL_Info *gl_info,
709 int infologLength = 0;
712 GL_EXTCALL(glGetObjectParameterivARB(obj,
713 GL_OBJECT_INFO_LOG_LENGTH_ARB,
716 /* A size of 1 is just a null-terminated string, so the log should be bigger than
717 * that if there are errors. */
718 if (infologLength > 1)
720 infoLog = (char *)HeapAlloc(GetProcessHeap(), 0, infologLength);
721 GL_EXTCALL(glGetInfoLogARB(obj, infologLength, NULL, infoLog));
722 FIXME("Error received from GLSL shader #%u: %s\n", obj, debugstr_a(infoLog));
723 HeapFree(GetProcessHeap(), 0, infoLog);
727 void shader_dump_ins_modifiers(const DWORD output) {
729 DWORD shift = (output & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
730 DWORD mmask = output & D3DSP_DSTMOD_MASK;
734 case 13: TRACE("_d8"); break;
735 case 14: TRACE("_d4"); break;
736 case 15: TRACE("_d2"); break;
737 case 1: TRACE("_x2"); break;
738 case 2: TRACE("_x4"); break;
739 case 3: TRACE("_x8"); break;
740 default: TRACE("_unhandled_shift(%ld)", shift); break;
743 if (mmask & D3DSPDM_SATURATE) TRACE("_sat");
744 if (mmask & D3DSPDM_PARTIALPRECISION) TRACE("_pp");
745 if (mmask & D3DSPDM_MSAMPCENTROID) TRACE("_centroid");
747 mmask &= ~(D3DSPDM_SATURATE | D3DSPDM_PARTIALPRECISION | D3DSPDM_MSAMPCENTROID);
749 FIXME("_unrecognized_modifier(%#lx)", mmask >> D3DSP_DSTMOD_SHIFT);
752 /* TODO: Move other shared code here */