2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2005 Oliver Stieber
7 * Copyright 2006 Ivan Gyurdiev
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
29 #include "wined3d_private.h"
31 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
35 /* Shader debugging - Change the following line to enable debugging of software
37 #if 0 /* Musxt not be 1 in cvs version */
38 # define VSTRACE(A) TRACE A
39 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
42 # define TRACE_VSVECTOR(name)
46 * DirectX9 SDK download
47 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
50 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
52 * Using Vertex Shaders
53 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
56 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
59 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
60 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
62 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
65 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
68 * http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
70 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
71 * http://developer.nvidia.com/view.asp?IO=vstovp
73 * NVIDIA: Memory Management with VAR
74 * http://developer.nvidia.com/view.asp?IO=var_memory_management
77 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
78 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
80 #define GLNAME_REQUIRE_GLSL ((const char *)1)
82 /*******************************
83 * vshader functions software VM
86 static void vshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
91 VSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
92 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
95 static void vshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
96 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
97 VSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
98 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
101 static void vshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
102 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
103 VSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
104 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
107 static void vshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
109 d->y = s0->y * s1->y;
112 VSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
113 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
116 static void vshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
122 tmp.f = floorf(s0->w);
123 d->x = powf(2.0f, tmp.f);
124 d->y = s0->w - tmp.f;
125 tmp.f = powf(2.0f, s0->w);
126 tmp.d &= 0xFFFFFF00U;
129 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
130 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
133 static void vshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
135 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
136 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
138 VSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
139 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
142 static void vshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
143 float tmp_f = fabsf(s0->w);
144 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
145 VSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
146 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
149 static void vshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
150 d->x = s0->x * s1->x + s2->x;
151 d->y = s0->y * s1->y + s2->y;
152 d->z = s0->z * s1->z + s2->z;
153 d->w = s0->w * s1->w + s2->w;
154 VSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
155 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
158 static void vshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
159 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
160 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
161 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
162 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
163 VSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
164 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
167 static void vshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
168 d->x = (s0->x < s1->x) ? s0->x : s1->x;
169 d->y = (s0->y < s1->y) ? s0->y : s1->y;
170 d->z = (s0->z < s1->z) ? s0->z : s1->z;
171 d->w = (s0->w < s1->w) ? s0->w : s1->w;
172 VSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
173 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
176 static void vshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
181 VSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
182 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
185 static void vshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
186 d->x = s0->x * s1->x;
187 d->y = s0->y * s1->y;
188 d->z = s0->z * s1->z;
189 d->w = s0->w * s1->w;
190 VSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
191 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
194 static void vshader_nop(void) {
195 /* NOPPPP ahhh too easy ;) */
196 VSTRACE(("executing nop\n"));
199 static void vshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
200 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
201 VSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
202 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
205 static void vshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
206 float tmp_f = fabsf(s0->w);
207 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
208 VSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
209 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
212 static void vshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
213 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
214 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
215 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
216 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
217 VSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
218 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
221 static void vshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
222 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
223 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
224 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
225 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
226 VSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
227 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
230 static void vshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
231 d->x = s0->x - s1->x;
232 d->y = s0->y - s1->y;
233 d->z = s0->z - s1->z;
234 d->w = s0->w - s1->w;
235 VSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
236 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
240 * Version 1.1 specific
243 static void vshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
244 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
245 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
246 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
249 static void vshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
250 float tmp_f = fabsf(s0->w);
251 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
252 VSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
253 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
256 static void vshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
257 d->x = s0->x - floorf(s0->x);
258 d->y = s0->y - floorf(s0->y);
261 VSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
262 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
265 typedef FLOAT D3DMATRIX44[4][4];
266 typedef FLOAT D3DMATRIX43[4][3];
267 typedef FLOAT D3DMATRIX34[3][4];
268 typedef FLOAT D3DMATRIX33[3][3];
269 typedef FLOAT D3DMATRIX23[2][3];
271 static void vshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
273 * Buggy CODE: here only if cast not work for copy/paste
274 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
275 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
276 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
277 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
278 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
279 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
280 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
282 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
283 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
284 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
285 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
286 VSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
287 VSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
288 VSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
289 VSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
292 static void vshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
293 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
294 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
295 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
297 VSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
298 VSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
299 VSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
300 VSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
303 static void vshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
304 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
305 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
306 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
307 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
308 VSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
309 VSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
310 VSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
311 VSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
314 static void vshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
315 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
316 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
317 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
319 VSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
320 VSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
321 VSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
322 VSTRACE(("executing m3x3(4): (%f) \n", d->w));
325 static void vshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
327 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
328 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
334 * Version 2.0 specific
336 static void vshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
337 d->x = s0->x * (s1->x - s2->x) + s2->x;
338 d->y = s0->y * (s1->y - s2->y) + s2->y;
339 d->z = s0->z * (s1->z - s2->z) + s2->z;
340 d->w = s0->w * (s1->w - s2->w) + s2->w;
343 static void vshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
344 d->x = s0->y * s1->z - s0->z * s1->y;
345 d->y = s0->z * s1->x - s0->x * s1->z;
346 d->z = s0->x * s1->y - s0->y * s1->x;
347 d->w = 0.9f; /* w is undefined, so set it to something safeish */
349 VSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
350 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
353 static void vshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
359 VSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
360 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
365 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
366 static void vshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
370 static void vshader_call(WINED3DSHADERVECTOR* d) {
374 static void vshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
378 static void vshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
382 static void vshader_ret(void) {
386 static void vshader_endloop(void) {
390 static void vshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
394 static void vshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
398 static void vshader_sgn(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
402 static void vshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
406 static void vshader_sincos3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
410 static void vshader_sincos2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
414 static void vshader_rep(WINED3DSHADERVECTOR* d) {
418 static void vshader_endrep(void) {
422 static void vshader_if(WINED3DSHADERVECTOR* d) {
426 static void vshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
430 static void vshader_else(void) {
434 static void vshader_label(WINED3DSHADERVECTOR* d) {
438 static void vshader_endif(void) {
442 static void vshader_break(void) {
446 static void vshader_breakc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
450 static void vshader_breakp(WINED3DSHADERVECTOR* d) {
454 static void vshader_mova(WINED3DSHADERVECTOR* d) {
458 static void vshader_defb(WINED3DSHADERVECTOR* d) {
462 static void vshader_defi(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
466 static void vshader_setp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
470 static void vshader_texldl(WINED3DSHADERVECTOR* d) {
475 static void vshader_hw_map2gl(SHADER_OPCODE_ARG* arg);
476 static void vshader_hw_mnxn(SHADER_OPCODE_ARG* arg);
479 * log, exp, frc, m*x* seems to be macros ins ... to see
481 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
484 {D3DSIO_NOP, "nop", "NOP", 0, 0, vshader_nop, vshader_hw_map2gl, NULL, 0, 0},
485 {D3DSIO_MOV, "mov", "MOV", 1, 2, vshader_mov, vshader_hw_map2gl, shader_glsl_mov, 0, 0},
486 {D3DSIO_ADD, "add", "ADD", 1, 3, vshader_add, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
487 {D3DSIO_SUB, "sub", "SUB", 1, 3, vshader_sub, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
488 {D3DSIO_MAD, "mad", "MAD", 1, 4, vshader_mad, vshader_hw_map2gl, shader_glsl_mad, 0, 0},
489 {D3DSIO_MUL, "mul", "MUL", 1, 3, vshader_mul, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
490 {D3DSIO_RCP, "rcp", "RCP", 1, 2, vshader_rcp, vshader_hw_map2gl, shader_glsl_rcp, 0, 0},
491 {D3DSIO_RSQ, "rsq", "RSQ", 1, 2, vshader_rsq, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
492 {D3DSIO_DP3, "dp3", "DP3", 1, 3, vshader_dp3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
493 {D3DSIO_DP4, "dp4", "DP4", 1, 3, vshader_dp4, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
494 {D3DSIO_MIN, "min", "MIN", 1, 3, vshader_min, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
495 {D3DSIO_MAX, "max", "MAX", 1, 3, vshader_max, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
496 {D3DSIO_SLT, "slt", "SLT", 1, 3, vshader_slt, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
497 {D3DSIO_SGE, "sge", "SGE", 1, 3, vshader_sge, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
498 {D3DSIO_ABS, "abs", "ABS", 1, 2, vshader_abs, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
499 {D3DSIO_EXP, "exp", "EX2", 1, 2, vshader_exp, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
500 {D3DSIO_LOG, "log", "LG2", 1, 2, vshader_log, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
501 {D3DSIO_EXPP, "expp", "EXP", 1, 2, vshader_expp, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
502 {D3DSIO_LOGP, "logp", "LOG", 1, 2, vshader_logp, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
503 {D3DSIO_LIT, "lit", "LIT", 1, 2, vshader_lit, vshader_hw_map2gl, shader_glsl_lit, 0, 0},
504 {D3DSIO_DST, "dst", "DST", 1, 3, vshader_dst, vshader_hw_map2gl, shader_glsl_dst, 0, 0},
505 {D3DSIO_LRP, "lrp", "LRP", 1, 4, vshader_lrp, NULL, shader_glsl_lrp, 0, 0},
506 {D3DSIO_FRC, "frc", "FRC", 1, 2, vshader_frc, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
507 {D3DSIO_POW, "pow", "POW", 1, 3, vshader_pow, NULL, shader_glsl_map2gl, 0, 0},
508 {D3DSIO_CRS, "crs", "XPS", 1, 3, vshader_crs, NULL, shader_glsl_map2gl, 0, 0},
509 /* TODO: sng can possibly be performed a s
512 {D3DSIO_SGN, "sgn", NULL, 1, 2, vshader_sgn, NULL, shader_glsl_map2gl, 0, 0},
513 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
516 MUL vec.xyz, vec, tmp;
517 but I think this is better because it accounts for w properly.
523 {D3DSIO_NRM, "nrm", NULL, 1, 2, vshader_nrm, NULL, shader_glsl_map2gl, 0, 0},
524 {D3DSIO_SINCOS, "sincos", NULL, 1, 4, vshader_sincos2, NULL, shader_glsl_sincos, D3DVS_VERSION(2,0), D3DVS_VERSION(2,0)},
525 {D3DSIO_SINCOS, "sincos", NULL, 1, 2, vshader_sincos3, NULL, shader_glsl_sincos, D3DVS_VERSION(3,0), -1},
528 {D3DSIO_M4x4, "m4x4", "undefined", 1, 3, vshader_m4x4, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
529 {D3DSIO_M4x3, "m4x3", "undefined", 1, 3, vshader_m4x3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
530 {D3DSIO_M3x4, "m3x4", "undefined", 1, 3, vshader_m3x4, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
531 {D3DSIO_M3x3, "m3x3", "undefined", 1, 3, vshader_m3x3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
532 {D3DSIO_M3x2, "m3x2", "undefined", 1, 3, vshader_m3x2, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
534 /* Declare registers */
535 {D3DSIO_DCL, "dcl", NULL, 0, 2, vshader_dcl, NULL, NULL, 0, 0},
537 /* Constant definitions */
538 {D3DSIO_DEF, "def", NULL, 1, 5, vshader_def, shader_hw_def, shader_glsl_def, 0, 0},
539 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 1, 2, vshader_defb, NULL, shader_glsl_defb, 0, 0},
540 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 1, 5, vshader_defi, NULL, shader_glsl_defi, 0, 0},
542 /* Flow control - requires GLSL or software shaders */
543 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 0, 1, vshader_rep, NULL, NULL, 0, 0},
544 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, 0, vshader_endrep, NULL, NULL, 0, 0},
545 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 0, 1, vshader_if, NULL, NULL, 0, 0},
546 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 0, 2, vshader_ifc, NULL, NULL, 0, 0},
547 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 0, 0, vshader_else, NULL, NULL, 0, 0},
548 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 0, 0, vshader_endif, NULL, NULL, 0, 0},
549 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 0, 0, vshader_break, NULL, NULL, 0, 0},
550 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 0, 2, vshader_breakc, NULL, NULL, 0, 0},
551 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 0, 1, vshader_breakp, NULL, NULL, 0, 0},
552 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 0, 1, vshader_call, NULL, NULL, 0, 0},
553 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 0, 2, vshader_callnz, NULL, NULL, 0, 0},
554 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 0, 2, vshader_loop, NULL, shader_glsl_loop, 0, 0},
555 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, 0, vshader_ret, NULL, NULL, 0, 0},
556 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, 0, vshader_endloop, NULL, shader_glsl_endloop, 0, 0},
557 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 0, 1, vshader_label, NULL, NULL, 0, 0},
559 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 1, 2, vshader_mova, NULL, shader_glsl_mov, 0, 0},
560 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 1, 3, vshader_setp, NULL, NULL, 0, 0},
561 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 1, 2, vshader_texldl, NULL, NULL, 0, 0},
562 {0, NULL, NULL, 0, 0, NULL, NULL, 0, 0}
565 inline static void vshader_program_add_output_param_swizzle(const DWORD param, int is_color, char *hwLine) {
566 /** operand output */
567 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
569 if (param & D3DSP_WRITEMASK_0) { strcat(hwLine, "x"); }
570 if (param & D3DSP_WRITEMASK_1) { strcat(hwLine, "y"); }
571 if (param & D3DSP_WRITEMASK_2) { strcat(hwLine, "z"); }
572 if (param & D3DSP_WRITEMASK_3) { strcat(hwLine, "w"); }
576 inline static void vshader_program_add_input_param_swizzle(const DWORD param, int is_color, char *hwLine) {
577 static const char swizzle_reg_chars_color_fix[] = "zyxw";
578 static const char swizzle_reg_chars[] = "xyzw";
579 const char* swizzle_regs = NULL;
583 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
584 DWORD swizzle_x = swizzle & 0x03;
585 DWORD swizzle_y = (swizzle >> 2) & 0x03;
586 DWORD swizzle_z = (swizzle >> 4) & 0x03;
587 DWORD swizzle_w = (swizzle >> 6) & 0x03;
590 swizzle_regs = swizzle_reg_chars_color_fix;
592 swizzle_regs = swizzle_reg_chars;
596 * swizzle bits fields:
599 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) { /* D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
601 sprintf(tmpReg, ".%c%c%c%c",
602 swizzle_regs[swizzle_x],
603 swizzle_regs[swizzle_y],
604 swizzle_regs[swizzle_z],
605 swizzle_regs[swizzle_w]);
606 strcat(hwLine, tmpReg);
610 if (swizzle_x == swizzle_y &&
611 swizzle_x == swizzle_z &&
612 swizzle_x == swizzle_w)
614 sprintf(tmpReg, ".%c", swizzle_regs[swizzle_x]);
615 strcat(hwLine, tmpReg);
617 sprintf(tmpReg, ".%c%c%c%c",
618 swizzle_regs[swizzle_x],
619 swizzle_regs[swizzle_y],
620 swizzle_regs[swizzle_z],
621 swizzle_regs[swizzle_w]);
622 strcat(hwLine, tmpReg);
626 inline static void vshader_program_add_param(SHADER_OPCODE_ARG *arg, const DWORD param, BOOL is_input, char *hwLine) {
628 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)arg->shader;
630 /* oPos, oFog and oPts in D3D */
631 static const char* hwrastout_reg_names[] = { "result.position", "result.fogcoord", "result.pointsize" };
633 DWORD reg = param & D3DSP_REGNUM_MASK;
634 DWORD regtype = shader_get_regtype(param);
636 BOOL is_color = FALSE;
638 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) {
639 strcat(hwLine, " -");
646 sprintf(tmpReg, "R%lu", reg);
647 strcat(hwLine, tmpReg);
651 if (This->arrayUsageMap[WINED3DSHADERDECLUSAGE_DIFFUSE] &&
652 reg == (This->arrayUsageMap[WINED3DSHADERDECLUSAGE_DIFFUSE] & D3DSP_REGNUM_MASK))
655 if (This->arrayUsageMap[WINED3DSHADERDECLUSAGE_SPECULAR] &&
656 reg == (This->arrayUsageMap[WINED3DSHADERDECLUSAGE_SPECULAR] & D3DSP_REGNUM_MASK))
659 /* FIXME: Shaders in 8.1 appear to not require a dcl statement - use
660 * the reg value from the vertex declaration. However, arrayUsageMap is not initialized
661 * in that case - how can we know if an input contains color data or not? */
663 sprintf(tmpReg, "vertex.attrib[%lu]", reg);
664 strcat(hwLine, tmpReg);
667 /* FIXME: some constants are named so we need a constants map*/
668 if (arg->reg_maps->constantsF[reg]) {
669 if (param & D3DVS_ADDRMODE_RELATIVE) {
670 FIXME("Relative addressing not expected for a named constant %lu\n", reg);
672 sprintf(tmpReg, "C%lu", reg);
674 sprintf(tmpReg, "C[%s%lu]", (param & D3DVS_ADDRMODE_RELATIVE) ? "A0.x + " : "", reg);
676 strcat(hwLine, tmpReg);
678 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
679 sprintf(tmpReg, "A%lu", reg);
680 strcat(hwLine, tmpReg);
683 sprintf(tmpReg, "%s", hwrastout_reg_names[reg]);
684 strcat(hwLine, tmpReg);
688 strcat(hwLine, "result.color.primary");
690 strcat(hwLine, "result.color.secondary");
693 case D3DSPR_TEXCRDOUT:
694 sprintf(tmpReg, "result.texcoord[%lu]", reg);
695 strcat(hwLine, tmpReg);
698 FIXME("Unknown reg type %ld %ld\n", regtype, reg);
699 strcat(hwLine, "unrecognized_register");
704 vshader_program_add_output_param_swizzle(param, is_color, hwLine);
706 vshader_program_add_input_param_swizzle(param, is_color, hwLine);
710 static void vshader_set_limits(
711 IWineD3DVertexShaderImpl *This) {
713 This->baseShader.limits.texcoord = 0;
714 This->baseShader.limits.attributes = 16;
715 This->baseShader.limits.packed_input = 0;
717 /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
718 This->baseShader.limits.constant_float = WINED3D_VSHADER_MAX_CONSTANTS;
720 switch (This->baseShader.hex_version) {
721 case D3DVS_VERSION(1,0):
722 case D3DVS_VERSION(1,1):
723 This->baseShader.limits.temporary = 12;
724 This->baseShader.limits.constant_bool = 0;
725 This->baseShader.limits.constant_int = 0;
726 This->baseShader.limits.address = 1;
727 This->baseShader.limits.packed_output = 0;
728 This->baseShader.limits.sampler = 0;
731 case D3DVS_VERSION(2,0):
732 case D3DVS_VERSION(2,1):
733 This->baseShader.limits.temporary = 12;
734 This->baseShader.limits.constant_bool = 16;
735 This->baseShader.limits.constant_int = 16;
736 This->baseShader.limits.address = 1;
737 This->baseShader.limits.packed_output = 0;
738 This->baseShader.limits.sampler = 0;
741 case D3DVS_VERSION(3,0):
742 This->baseShader.limits.temporary = 32;
743 This->baseShader.limits.constant_bool = 32;
744 This->baseShader.limits.constant_int = 32;
745 This->baseShader.limits.address = 1;
746 This->baseShader.limits.packed_output = 12;
747 This->baseShader.limits.sampler = 4;
750 default: This->baseShader.limits.temporary = 12;
751 This->baseShader.limits.constant_bool = 16;
752 This->baseShader.limits.constant_int = 16;
753 This->baseShader.limits.address = 1;
754 This->baseShader.limits.packed_output = 0;
755 This->baseShader.limits.sampler = 0;
756 FIXME("Unrecognized vertex shader version %#lx\n",
757 This->baseShader.hex_version);
761 /* Map the opcode 1-to-1 to the GL code */
762 static void vshader_hw_map2gl(SHADER_OPCODE_ARG* arg) {
764 CONST SHADER_OPCODE* curOpcode = arg->opcode;
765 SHADER_BUFFER* buffer = arg->buffer;
766 DWORD dst = arg->dst;
767 DWORD* src = arg->src;
769 DWORD dst_regtype = shader_get_regtype(dst);
773 if (curOpcode->opcode == D3DSIO_MOV && dst_regtype == D3DSPR_ADDR)
774 strcpy(tmpLine, "ARL");
776 strcpy(tmpLine, curOpcode->glname);
778 if (curOpcode->num_params > 0) {
779 vshader_program_add_param(arg, dst, FALSE, tmpLine);
780 for (i = 1; i < curOpcode->num_params; ++i) {
781 strcat(tmpLine, ",");
782 vshader_program_add_param(arg, src[i-1], TRUE, tmpLine);
785 shader_addline(buffer, "%s;\n", tmpLine);
788 /** Handles transforming all D3DSIO_M?x? opcodes for
789 Vertex shaders to ARB_vertex_program codes */
790 static void vshader_hw_mnxn(SHADER_OPCODE_ARG* arg) {
794 SHADER_OPCODE_ARG tmpArg;
796 /* Set constants for the temporary argument */
797 tmpArg.shader = arg->shader;
798 tmpArg.buffer = arg->buffer;
799 tmpArg.src[0] = arg->src[0];
800 tmpArg.reg_maps = arg->reg_maps;
802 switch(arg->opcode->opcode) {
805 tmpArg.opcode = &IWineD3DVertexShaderImpl_shader_ins[D3DSIO_DP4];
809 tmpArg.opcode = &IWineD3DVertexShaderImpl_shader_ins[D3DSIO_DP4];
813 tmpArg.opcode = &IWineD3DVertexShaderImpl_shader_ins[D3DSIO_DP3];
817 tmpArg.opcode = &IWineD3DVertexShaderImpl_shader_ins[D3DSIO_DP3];
821 tmpArg.opcode = &IWineD3DVertexShaderImpl_shader_ins[D3DSIO_DP3];
827 for (i = 0; i < nComponents; i++) {
828 tmpArg.dst = ((arg->dst) & ~D3DSP_WRITEMASK_ALL)|(D3DSP_WRITEMASK_0<<i);
829 tmpArg.src[1] = arg->src[1]+i;
830 vshader_hw_map2gl(&tmpArg);
834 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
835 or GLSL and send it to the card */
836 inline static VOID IWineD3DVertexShaderImpl_GenerateShader(
837 IWineD3DVertexShader *iface,
838 CONST DWORD *pFunction) {
840 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
841 SHADER_BUFFER buffer;
843 /* First pass: figure out which registers are used, what the semantics are, etc.. */
844 shader_reg_maps reg_maps;
845 DWORD semantics_out[WINED3DSHADERDECLUSAGE_MAX_USAGE];
847 memset(®_maps, 0, sizeof(shader_reg_maps));
848 memset(semantics_out, 0, WINED3DSHADERDECLUSAGE_MAX_USAGE * sizeof(DWORD));
849 reg_maps.semantics_in = This->arrayUsageMap;
850 reg_maps.semantics_out = semantics_out;
851 shader_get_registers_used((IWineD3DBaseShader*) This, ®_maps, pFunction);
852 /* FIXME: validate against OpenGL */
854 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
855 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
856 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
857 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
858 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
859 This->fixupVertexBufferSize = PGMSIZE;
860 This->fixupVertexBuffer[0] = 0;
862 buffer.buffer = This->device->fixupVertexBuffer;
864 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
869 if (wined3d_settings.shader_mode == SHADER_GLSL) {
871 /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
872 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
874 /* Base Declarations */
875 shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, ®_maps, &buffer);
877 /* Base Shader Body */
878 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, ®_maps, pFunction);
880 /* Unpack 3.0 outputs */
881 if (This->baseShader.hex_version >= D3DVS_VERSION(3,0))
882 vshader_glsl_output_unpack(&buffer, semantics_out);
884 shader_addline(&buffer, "}\n\0");
886 TRACE("Compiling shader object %u\n", shader_obj);
887 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
888 GL_EXTCALL(glCompileShaderARB(shader_obj));
889 print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
891 /* Store the shader object */
892 This->baseShader.prgId = shader_obj;
894 } else if (wined3d_settings.shader_mode == SHADER_ARB) {
896 /* Create the hw ARB shader */
897 shader_addline(&buffer, "!!ARBvp1.0\n");
899 /* Mesa supports only 95 constants */
900 if (GL_VEND(MESA) || GL_VEND(WINE))
901 This->baseShader.limits.constant_float =
902 min(95, This->baseShader.limits.constant_float);
904 /* Base Declarations */
905 shader_generate_arb_declarations( (IWineD3DBaseShader*) This, ®_maps, &buffer);
907 /* Base Shader Body */
908 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, ®_maps, pFunction);
910 shader_addline(&buffer, "END\n\0");
912 /* TODO: change to resource.glObjectHandle or something like that */
913 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
915 TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
916 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
918 TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
919 /* Create the program and check for errors */
920 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
921 buffer.bsize, buffer.buffer));
923 if (glGetError() == GL_INVALID_OPERATION) {
925 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
926 FIXME("HW VertexShader Error at position %d: %s\n",
927 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
928 This->baseShader.prgId = -1;
932 #if 1 /* if were using the data buffer of device then we don't need to free it */
933 HeapFree(GetProcessHeap(), 0, buffer.buffer);
937 BOOL IWineD3DVertexShaderImpl_ExecuteHAL(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
939 * TODO: use the NV_vertex_program (or 1_1) extension
940 * and specifics vendors (ARB_vertex_program??) variants for it
945 HRESULT WINAPI IWineD3DVertexShaderImpl_ExecuteSW(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
946 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
949 /** Vertex Shader Temporary Registers */
950 WINED3DSHADERVECTOR R[12];
951 /*D3DSHADERSCALAR A0;*/
952 WINED3DSHADERVECTOR A[1];
953 /** temporary Vector for modifier management */
954 WINED3DSHADERVECTOR d;
955 WINED3DSHADERVECTOR s[3];
957 const DWORD* pToken = This->baseShader.function;
958 const SHADER_OPCODE* curOpcode = NULL;
959 /** functions parameters */
960 WINED3DSHADERVECTOR* p[6];
961 WINED3DSHADERVECTOR* p_send[6];
964 /** init temporary register */
965 memset(R, 0, 12 * sizeof(WINED3DSHADERVECTOR));
967 /* vshader_program_parse(vshader); */
968 #if 0 /* Must not be 1 in cvs */
970 TRACE_VSVECTOR(This->data->C[0]);
971 TRACE_VSVECTOR(This->data->C[1]);
972 TRACE_VSVECTOR(This->data->C[2]);
973 TRACE_VSVECTOR(This->data->C[3]);
974 TRACE_VSVECTOR(This->data->C[4]);
975 TRACE_VSVECTOR(This->data->C[5]);
976 TRACE_VSVECTOR(This->data->C[6]);
977 TRACE_VSVECTOR(This->data->C[7]);
978 TRACE_VSVECTOR(This->data->C[8]);
979 TRACE_VSVECTOR(This->data->C[64]);
980 TRACE_VSVECTOR(input->V[D3DVSDE_POSITION]);
981 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDWEIGHT]);
982 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDINDICES]);
983 TRACE_VSVECTOR(input->V[D3DVSDE_NORMAL]);
984 TRACE_VSVECTOR(input->V[D3DVSDE_PSIZE]);
985 TRACE_VSVECTOR(input->V[D3DVSDE_DIFFUSE]);
986 TRACE_VSVECTOR(input->V[D3DVSDE_SPECULAR]);
987 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD0]);
988 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD1]);
991 TRACE_VSVECTOR(vshader->data->C[64]);
992 /* TODO: Run through all the tokens and find and labels, if, endifs, loops etc...., and make a labels list */
994 /* the first dword is the version tag */
997 if (shader_is_vshader_version(*pToken)) { /** version */
1000 while (D3DVS_END() != *pToken) {
1001 if (shader_is_comment(*pToken)) { /** comment */
1002 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1004 pToken += comment_len;
1008 opcode_token = *pToken++;
1009 curOpcode = shader_get_opcode((IWineD3DBaseShader*) This, opcode_token);
1011 if (NULL == curOpcode) {
1012 FIXME("Unrecognized opcode: token=%08lX\n", opcode_token);
1013 pToken += shader_skip_unrecognized((IWineD3DBaseShader*) This, pToken);
1017 if (curOpcode->num_params > 0) {
1018 /* TRACE(">> execting opcode: pos=%d opcode_name=%s token=%08lX\n", pToken - vshader->function, curOpcode->name, *pToken); */
1019 for (i = 0; i < curOpcode->num_params; ++i) {
1020 DWORD reg = pToken[i] & D3DSP_REGNUM_MASK;
1021 DWORD regtype = shader_get_regtype(pToken[i]);
1025 /* TRACE("p[%d]=R[%d]\n", i, reg); */
1029 /* TRACE("p[%d]=V[%s]\n", i, VertexShaderDeclRegister[reg]); */
1030 p[i] = &input->V[reg];
1033 if (pToken[i] & D3DVS_ADDRMODE_RELATIVE) {
1034 p[i] = &This->data->C[(DWORD) A[0].x + reg];
1036 p[i] = &This->data->C[reg];
1039 case D3DSPR_ADDR: /* case D3DSPR_TEXTURE: */
1041 ERR("cannot handle address registers != a0, forcing use of a0\n");
1044 /* TRACE("p[%d]=A[%d]\n", i, reg); */
1047 case D3DSPR_RASTOUT:
1049 case D3DSRO_POSITION:
1050 p[i] = &output->oPos;
1053 p[i] = &output->oFog;
1055 case D3DSRO_POINT_SIZE:
1056 p[i] = &output->oPts;
1060 case D3DSPR_ATTROUT:
1061 /* TRACE("p[%d]=oD[%d]\n", i, reg); */
1062 p[i] = &output->oD[reg];
1064 case D3DSPR_TEXCRDOUT:
1065 /* TRACE("p[%d]=oT[%d]\n", i, reg); */
1066 p[i] = &output->oT[reg];
1068 /* TODO Decls and defs */
1077 if (i > 0) { /* input reg */
1078 DWORD swizzle = (pToken[i] & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
1079 UINT isNegative = ((pToken[i] & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG);
1081 if (!isNegative && (D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) {
1082 /* TRACE("p[%d] not swizzled\n", i); */
1085 DWORD swizzle_x = swizzle & 0x03;
1086 DWORD swizzle_y = (swizzle >> 2) & 0x03;
1087 DWORD swizzle_z = (swizzle >> 4) & 0x03;
1088 DWORD swizzle_w = (swizzle >> 6) & 0x03;
1089 /* TRACE("p[%d] swizzled\n", i); */
1090 float* tt = (float*) p[i];
1091 s[i].x = (isNegative) ? -tt[swizzle_x] : tt[swizzle_x];
1092 s[i].y = (isNegative) ? -tt[swizzle_y] : tt[swizzle_y];
1093 s[i].z = (isNegative) ? -tt[swizzle_z] : tt[swizzle_z];
1094 s[i].w = (isNegative) ? -tt[swizzle_w] : tt[swizzle_w];
1097 } else { /* output reg */
1098 if ((pToken[i] & D3DSP_WRITEMASK_ALL) == D3DSP_WRITEMASK_ALL) {
1101 p_send[i] = &d; /* to be post-processed for modifiers management */
1107 switch (curOpcode->num_params) {
1109 curOpcode->soft_fct();
1112 curOpcode->soft_fct(p_send[0]);
1115 curOpcode->soft_fct(p_send[0], p_send[1]);
1118 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2]);
1121 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3]);
1124 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4]);
1127 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4], p_send[5]);
1130 ERR("%s too many params: %u\n", curOpcode->name, curOpcode->num_params);
1133 /* check if output reg modifier post-process */
1134 if (curOpcode->num_params > 0 && (pToken[0] & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1135 if (pToken[0] & D3DSP_WRITEMASK_0) p[0]->x = d.x;
1136 if (pToken[0] & D3DSP_WRITEMASK_1) p[0]->y = d.y;
1137 if (pToken[0] & D3DSP_WRITEMASK_2) p[0]->z = d.z;
1138 if (pToken[0] & D3DSP_WRITEMASK_3) p[0]->w = d.w;
1141 TRACE_VSVECTOR(output->oPos);
1142 TRACE_VSVECTOR(output->oD[0]);
1143 TRACE_VSVECTOR(output->oD[1]);
1144 TRACE_VSVECTOR(output->oT[0]);
1145 TRACE_VSVECTOR(output->oT[1]);
1146 TRACE_VSVECTOR(R[0]);
1147 TRACE_VSVECTOR(R[1]);
1148 TRACE_VSVECTOR(R[2]);
1149 TRACE_VSVECTOR(R[3]);
1150 TRACE_VSVECTOR(R[4]);
1151 TRACE_VSVECTOR(R[5]);
1154 /* to next opcode token */
1155 pToken += curOpcode->num_params;
1158 TRACE("End of current instruction:\n");
1159 TRACE_VSVECTOR(output->oPos);
1160 TRACE_VSVECTOR(output->oD[0]);
1161 TRACE_VSVECTOR(output->oD[1]);
1162 TRACE_VSVECTOR(output->oT[0]);
1163 TRACE_VSVECTOR(output->oT[1]);
1164 TRACE_VSVECTOR(R[0]);
1165 TRACE_VSVECTOR(R[1]);
1166 TRACE_VSVECTOR(R[2]);
1167 TRACE_VSVECTOR(R[3]);
1168 TRACE_VSVECTOR(R[4]);
1169 TRACE_VSVECTOR(R[5]);
1172 #if 0 /* Must not be 1 in cvs */
1174 TRACE_VSVECTOR(output->oPos);
1175 TRACE_VSVECTOR(output->oD[0]);
1176 TRACE_VSVECTOR(output->oD[1]);
1177 TRACE_VSVECTOR(output->oT[0]);
1178 TRACE_VSVECTOR(output->oT[1]);
1183 /* *******************************************
1184 IWineD3DVertexShader IUnknown parts follow
1185 ******************************************* */
1186 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
1188 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1189 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
1190 if (IsEqualGUID(riid, &IID_IUnknown)
1191 || IsEqualGUID(riid, &IID_IWineD3DBase)
1192 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
1193 || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
1194 IUnknown_AddRef(iface);
1199 return E_NOINTERFACE;
1202 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
1203 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1204 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
1205 return InterlockedIncrement(&This->ref);
1208 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
1209 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1211 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
1212 ref = InterlockedDecrement(&This->ref);
1214 if (This->vertexDeclaration) IWineD3DVertexDeclaration_Release(This->vertexDeclaration);
1215 if (wined3d_settings.shader_mode == SHADER_GLSL && This->baseShader.prgId != 0) {
1216 /* If this shader is still attached to a program, GL will perform a lazy delete */
1217 TRACE("Deleting shader object %u\n", This->baseShader.prgId);
1218 GL_EXTCALL(glDeleteObjectARB(This->baseShader.prgId));
1219 checkGLcall("glDeleteObjectARB");
1221 HeapFree(GetProcessHeap(), 0, This);
1226 /* *******************************************
1227 IWineD3DVertexShader IWineD3DVertexShader parts follow
1228 ******************************************* */
1230 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
1231 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1233 *parent = This->parent;
1234 IUnknown_AddRef(*parent);
1235 TRACE("(%p) : returning %p\n", This, *parent);
1239 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
1240 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1241 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
1242 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
1243 TRACE("(%p) returning %p\n", This, *pDevice);
1247 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
1248 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
1249 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
1251 if (NULL == pData) {
1252 *pSizeOfData = This->baseShader.functionLength;
1255 if (*pSizeOfData < This->baseShader.functionLength) {
1256 *pSizeOfData = This->baseShader.functionLength;
1257 return WINED3DERR_MOREDATA;
1259 if (NULL == This->baseShader.function) { /* no function defined */
1260 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
1261 (*(DWORD **) pData) = NULL;
1263 if(This->baseShader.functionLength == 0){
1266 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
1267 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
1272 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
1274 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
1276 shader_trace_init((IWineD3DBaseShader*) This, pFunction);
1277 vshader_set_limits(This);
1279 /* Generate HW shader in needed */
1280 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW)
1281 IWineD3DVertexShaderImpl_GenerateShader(iface, pFunction);
1283 /* copy the function ... because it will certainly be released by application */
1284 if (NULL != pFunction) {
1285 This->baseShader.function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1286 memcpy((void *)This->baseShader.function, pFunction, This->baseShader.functionLength);
1288 This->baseShader.function = NULL;
1293 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
1295 /*** IUnknown methods ***/
1296 IWineD3DVertexShaderImpl_QueryInterface,
1297 IWineD3DVertexShaderImpl_AddRef,
1298 IWineD3DVertexShaderImpl_Release,
1299 /*** IWineD3DBase methods ***/
1300 IWineD3DVertexShaderImpl_GetParent,
1301 /*** IWineD3DBaseShader methods ***/
1302 IWineD3DVertexShaderImpl_SetFunction,
1303 /*** IWineD3DVertexShader methods ***/
1304 IWineD3DVertexShaderImpl_GetDevice,
1305 IWineD3DVertexShaderImpl_GetFunction