2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2004 Christian Costa
7 * Copyright 2005 Oliver Stieber
8 * Copyright 2006 Ivan Gyurdiev
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "wined3d_private.h"
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
36 /* Shader debugging - Change the following line to enable debugging of software
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
43 # define TRACE_VSVECTOR(name)
47 * DirectX9 SDK download
48 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
51 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
53 * Using Vertex Shaders
54 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
57 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
60 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
66 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
69 * http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
71 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72 * http://developer.nvidia.com/view.asp?IO=vstovp
74 * NVIDIA: Memory Management with VAR
75 * http://developer.nvidia.com/view.asp?IO=var_memory_management
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
81 #define GLNAME_REQUIRE_GLSL ((const char *)1)
83 /*******************************
84 * vshader functions software VM
87 static void vshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
92 VSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
93 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
96 static void vshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
97 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
98 VSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
99 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
102 static void vshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
103 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
104 VSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
105 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
108 static void vshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
110 d->y = s0->y * s1->y;
113 VSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
114 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
117 static void vshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
123 tmp.f = floorf(s0->w);
124 d->x = powf(2.0f, tmp.f);
125 d->y = s0->w - tmp.f;
126 tmp.f = powf(2.0f, s0->w);
127 tmp.d &= 0xFFFFFF00U;
130 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
131 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
134 static void vshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
136 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
137 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
139 VSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
140 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
143 static void vshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
144 float tmp_f = fabsf(s0->w);
145 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
146 VSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
147 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
150 static void vshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
151 d->x = s0->x * s1->x + s2->x;
152 d->y = s0->y * s1->y + s2->y;
153 d->z = s0->z * s1->z + s2->z;
154 d->w = s0->w * s1->w + s2->w;
155 VSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
156 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
159 static void vshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
160 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
161 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
162 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
163 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
164 VSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
165 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
168 static void vshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
169 d->x = (s0->x < s1->x) ? s0->x : s1->x;
170 d->y = (s0->y < s1->y) ? s0->y : s1->y;
171 d->z = (s0->z < s1->z) ? s0->z : s1->z;
172 d->w = (s0->w < s1->w) ? s0->w : s1->w;
173 VSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
174 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
177 static void vshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
182 VSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
183 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
186 static void vshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
187 d->x = s0->x * s1->x;
188 d->y = s0->y * s1->y;
189 d->z = s0->z * s1->z;
190 d->w = s0->w * s1->w;
191 VSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
192 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
195 static void vshader_nop(void) {
196 /* NOPPPP ahhh too easy ;) */
197 VSTRACE(("executing nop\n"));
200 static void vshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
201 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
202 VSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
203 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
206 static void vshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
207 float tmp_f = fabsf(s0->w);
208 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
209 VSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
210 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
213 static void vshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
214 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
215 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
216 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
217 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
218 VSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
219 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
222 static void vshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
223 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
224 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
225 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
226 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
227 VSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
228 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
231 static void vshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
232 d->x = s0->x - s1->x;
233 d->y = s0->y - s1->y;
234 d->z = s0->z - s1->z;
235 d->w = s0->w - s1->w;
236 VSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
237 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
241 * Version 1.1 specific
244 static void vshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
245 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
246 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
247 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
250 static void vshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
251 float tmp_f = fabsf(s0->w);
252 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
253 VSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
254 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
257 static void vshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
258 d->x = s0->x - floorf(s0->x);
259 d->y = s0->y - floorf(s0->y);
262 VSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
263 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
266 typedef FLOAT D3DMATRIX44[4][4];
267 typedef FLOAT D3DMATRIX43[4][3];
268 typedef FLOAT D3DMATRIX34[3][4];
269 typedef FLOAT D3DMATRIX33[3][3];
270 typedef FLOAT D3DMATRIX23[2][3];
272 static void vshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
274 * Buggy CODE: here only if cast not work for copy/paste
275 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
276 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
277 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
278 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
279 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
280 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
281 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
283 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
284 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
285 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
286 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
287 VSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
288 VSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
289 VSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
290 VSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
293 static void vshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
294 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
295 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
296 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
298 VSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
299 VSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
300 VSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
301 VSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
304 static void vshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
305 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
306 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
307 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
308 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
309 VSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
310 VSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
311 VSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
312 VSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
315 static void vshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
316 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
317 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
318 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
320 VSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
321 VSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
322 VSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
323 VSTRACE(("executing m3x3(4): (%f) \n", d->w));
326 static void vshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
328 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
329 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
335 * Version 2.0 specific
337 static void vshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
338 d->x = s0->x * (s1->x - s2->x) + s2->x;
339 d->y = s0->y * (s1->y - s2->y) + s2->y;
340 d->z = s0->z * (s1->z - s2->z) + s2->z;
341 d->w = s0->w * (s1->w - s2->w) + s2->w;
344 static void vshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
345 d->x = s0->y * s1->z - s0->z * s1->y;
346 d->y = s0->z * s1->x - s0->x * s1->z;
347 d->z = s0->x * s1->y - s0->y * s1->x;
348 d->w = 0.9f; /* w is undefined, so set it to something safeish */
350 VSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
351 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
354 static void vshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
360 VSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
361 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
366 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
367 static void vshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
371 static void vshader_call(WINED3DSHADERVECTOR* d) {
375 static void vshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
379 static void vshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
383 static void vshader_ret(void) {
387 static void vshader_endloop(void) {
391 static void vshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
395 static void vshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
399 static void vshader_sgn(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
403 static void vshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
407 static void vshader_sincos3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
411 static void vshader_sincos2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
415 static void vshader_rep(WINED3DSHADERVECTOR* d) {
419 static void vshader_endrep(void) {
423 static void vshader_if(WINED3DSHADERVECTOR* d) {
427 static void vshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
431 static void vshader_else(void) {
435 static void vshader_label(WINED3DSHADERVECTOR* d) {
439 static void vshader_endif(void) {
443 static void vshader_break(void) {
447 static void vshader_breakc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
451 static void vshader_breakp(WINED3DSHADERVECTOR* d) {
455 static void vshader_mova(WINED3DSHADERVECTOR* d) {
459 static void vshader_defb(WINED3DSHADERVECTOR* d) {
463 static void vshader_defi(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
467 static void vshader_setp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
471 static void vshader_texldl(WINED3DSHADERVECTOR* d) {
475 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
478 {D3DSIO_NOP, "nop", "NOP", 0, 0, vshader_nop, vshader_hw_map2gl, NULL, 0, 0},
479 {D3DSIO_MOV, "mov", "MOV", 1, 2, vshader_mov, vshader_hw_map2gl, shader_glsl_mov, 0, 0},
480 {D3DSIO_ADD, "add", "ADD", 1, 3, vshader_add, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
481 {D3DSIO_SUB, "sub", "SUB", 1, 3, vshader_sub, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
482 {D3DSIO_MAD, "mad", "MAD", 1, 4, vshader_mad, vshader_hw_map2gl, shader_glsl_mad, 0, 0},
483 {D3DSIO_MUL, "mul", "MUL", 1, 3, vshader_mul, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
484 {D3DSIO_RCP, "rcp", "RCP", 1, 2, vshader_rcp, vshader_hw_map2gl, shader_glsl_rcp, 0, 0},
485 {D3DSIO_RSQ, "rsq", "RSQ", 1, 2, vshader_rsq, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
486 {D3DSIO_DP3, "dp3", "DP3", 1, 3, vshader_dp3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
487 {D3DSIO_DP4, "dp4", "DP4", 1, 3, vshader_dp4, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
488 {D3DSIO_MIN, "min", "MIN", 1, 3, vshader_min, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
489 {D3DSIO_MAX, "max", "MAX", 1, 3, vshader_max, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
490 {D3DSIO_SLT, "slt", "SLT", 1, 3, vshader_slt, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
491 {D3DSIO_SGE, "sge", "SGE", 1, 3, vshader_sge, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
492 {D3DSIO_ABS, "abs", "ABS", 1, 2, vshader_abs, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
493 {D3DSIO_EXP, "exp", "EX2", 1, 2, vshader_exp, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
494 {D3DSIO_LOG, "log", "LG2", 1, 2, vshader_log, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
495 {D3DSIO_EXPP, "expp", "EXP", 1, 2, vshader_expp, vshader_hw_map2gl, shader_glsl_expp, 0, 0},
496 {D3DSIO_LOGP, "logp", "LOG", 1, 2, vshader_logp, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
497 {D3DSIO_LIT, "lit", "LIT", 1, 2, vshader_lit, vshader_hw_map2gl, shader_glsl_lit, 0, 0},
498 {D3DSIO_DST, "dst", "DST", 1, 3, vshader_dst, vshader_hw_map2gl, shader_glsl_dst, 0, 0},
499 {D3DSIO_LRP, "lrp", "LRP", 1, 4, vshader_lrp, NULL, shader_glsl_lrp, 0, 0},
500 {D3DSIO_FRC, "frc", "FRC", 1, 2, vshader_frc, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
501 {D3DSIO_POW, "pow", "POW", 1, 3, vshader_pow, NULL, shader_glsl_map2gl, 0, 0},
502 {D3DSIO_CRS, "crs", "XPS", 1, 3, vshader_crs, NULL, shader_glsl_map2gl, 0, 0},
503 /* TODO: sng can possibly be performed a s
506 {D3DSIO_SGN, "sgn", NULL, 1, 2, vshader_sgn, NULL, shader_glsl_map2gl, 0, 0},
507 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
510 MUL vec.xyz, vec, tmp;
511 but I think this is better because it accounts for w properly.
517 {D3DSIO_NRM, "nrm", NULL, 1, 2, vshader_nrm, NULL, shader_glsl_map2gl, 0, 0},
518 {D3DSIO_SINCOS, "sincos", NULL, 1, 4, vshader_sincos2, NULL, shader_glsl_sincos, D3DVS_VERSION(2,0), D3DVS_VERSION(2,0)},
519 {D3DSIO_SINCOS, "sincos", NULL, 1, 2, vshader_sincos3, NULL, shader_glsl_sincos, D3DVS_VERSION(3,0), -1},
522 {D3DSIO_M4x4, "m4x4", "undefined", 1, 3, vshader_m4x4, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
523 {D3DSIO_M4x3, "m4x3", "undefined", 1, 3, vshader_m4x3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
524 {D3DSIO_M3x4, "m3x4", "undefined", 1, 3, vshader_m3x4, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
525 {D3DSIO_M3x3, "m3x3", "undefined", 1, 3, vshader_m3x3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
526 {D3DSIO_M3x2, "m3x2", "undefined", 1, 3, vshader_m3x2, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
528 /* Declare registers */
529 {D3DSIO_DCL, "dcl", NULL, 0, 2, vshader_dcl, NULL, NULL, 0, 0},
531 /* Constant definitions */
532 {D3DSIO_DEF, "def", NULL, 1, 5, vshader_def, NULL, NULL, 0, 0},
533 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 1, 2, vshader_defb, NULL, NULL, 0, 0},
534 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 1, 5, vshader_defi, NULL, NULL, 0, 0},
536 /* Flow control - requires GLSL or software shaders */
537 {D3DSIO_REP , "rep", NULL, 0, 1, vshader_rep, NULL, shader_glsl_rep, D3DVS_VERSION(2,0), -1},
538 {D3DSIO_ENDREP, "endrep", NULL, 0, 0, vshader_endrep, NULL, shader_glsl_end, D3DVS_VERSION(2,0), -1},
539 {D3DSIO_IF, "if", NULL, 0, 1, vshader_if, NULL, shader_glsl_if, D3DVS_VERSION(2,0), -1},
540 {D3DSIO_IFC, "ifc", NULL, 0, 2, vshader_ifc, NULL, shader_glsl_ifc, D3DVS_VERSION(2,1), -1},
541 {D3DSIO_ELSE, "else", NULL, 0, 0, vshader_else, NULL, shader_glsl_else, D3DVS_VERSION(2,0), -1},
542 {D3DSIO_ENDIF, "endif", NULL, 0, 0, vshader_endif, NULL, shader_glsl_end, D3DVS_VERSION(2,0), -1},
543 {D3DSIO_BREAK, "break", NULL, 0, 0, vshader_break, NULL, shader_glsl_break, D3DVS_VERSION(2,1), -1},
544 {D3DSIO_BREAKC, "breakc", NULL, 0, 2, vshader_breakc, NULL, shader_glsl_breakc, D3DVS_VERSION(2,1), -1},
545 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 0, 1, vshader_breakp, NULL, NULL, 0, 0},
546 {D3DSIO_CALL, "call", NULL, 0, 1, vshader_call, NULL, shader_glsl_call, D3DVS_VERSION(2,0), -1},
547 {D3DSIO_CALLNZ, "callnz", NULL, 0, 2, vshader_callnz, NULL, shader_glsl_callnz, D3DVS_VERSION(2,0), -1},
548 {D3DSIO_LOOP, "loop", NULL, 0, 2, vshader_loop, NULL, shader_glsl_loop, D3DVS_VERSION(2,0), -1},
549 {D3DSIO_RET, "ret", NULL, 0, 0, vshader_ret, NULL, NULL, D3DVS_VERSION(2,0), -1},
550 {D3DSIO_ENDLOOP, "endloop", NULL, 0, 0, vshader_endloop, NULL, shader_glsl_end, D3DVS_VERSION(2,0), -1},
551 {D3DSIO_LABEL, "label", NULL, 0, 1, vshader_label, NULL, shader_glsl_label, D3DVS_VERSION(2,0), -1},
553 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 1, 2, vshader_mova, NULL, shader_glsl_mov, 0, 0},
554 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 1, 3, vshader_setp, NULL, NULL, 0, 0},
555 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 1, 2, vshader_texldl, NULL, NULL, 0, 0},
556 {0, NULL, NULL, 0, 0, NULL, NULL, 0, 0}
559 static void vshader_set_limits(
560 IWineD3DVertexShaderImpl *This) {
562 This->baseShader.limits.texcoord = 0;
563 This->baseShader.limits.attributes = 16;
564 This->baseShader.limits.packed_input = 0;
566 /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
567 This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
569 switch (This->baseShader.hex_version) {
570 case D3DVS_VERSION(1,0):
571 case D3DVS_VERSION(1,1):
572 This->baseShader.limits.temporary = 12;
573 This->baseShader.limits.constant_bool = 0;
574 This->baseShader.limits.constant_int = 0;
575 This->baseShader.limits.address = 1;
576 This->baseShader.limits.packed_output = 0;
577 This->baseShader.limits.sampler = 0;
578 This->baseShader.limits.label = 0;
581 case D3DVS_VERSION(2,0):
582 case D3DVS_VERSION(2,1):
583 This->baseShader.limits.temporary = 12;
584 This->baseShader.limits.constant_bool = 16;
585 This->baseShader.limits.constant_int = 16;
586 This->baseShader.limits.address = 1;
587 This->baseShader.limits.packed_output = 0;
588 This->baseShader.limits.sampler = 0;
589 This->baseShader.limits.label = 16;
592 case D3DVS_VERSION(3,0):
593 This->baseShader.limits.temporary = 32;
594 This->baseShader.limits.constant_bool = 32;
595 This->baseShader.limits.constant_int = 32;
596 This->baseShader.limits.address = 1;
597 This->baseShader.limits.packed_output = 12;
598 This->baseShader.limits.sampler = 4;
599 This->baseShader.limits.label = 16; /* FIXME: 2048 */
602 default: This->baseShader.limits.temporary = 12;
603 This->baseShader.limits.constant_bool = 16;
604 This->baseShader.limits.constant_int = 16;
605 This->baseShader.limits.address = 1;
606 This->baseShader.limits.packed_output = 0;
607 This->baseShader.limits.sampler = 0;
608 This->baseShader.limits.label = 16;
609 FIXME("Unrecognized vertex shader version %#lx\n",
610 This->baseShader.hex_version);
614 /* This is an internal function,
615 * used to create fake semantics for shaders
616 * that don't have them - d3d8 shaders where the declaration
617 * stores the register for each input
619 static void vshader_set_input(
620 IWineD3DVertexShaderImpl* This,
622 BYTE usage, BYTE usage_idx) {
624 /* Fake usage: set reserved bit, usage, usage_idx */
625 DWORD usage_token = (0x1 << 31) |
626 (usage << D3DSP_DCL_USAGE_SHIFT) | (usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT);
628 /* Fake register; set reserved bit, regnum, type: input, wmask: all */
629 DWORD reg_token = (0x1 << 31) |
630 D3DSP_WRITEMASK_ALL | (D3DSPR_INPUT << D3DSP_REGTYPE_SHIFT) | regnum;
632 This->semantics_in[regnum].usage = usage_token;
633 This->semantics_in[regnum].reg = reg_token;
636 BOOL vshader_get_input(
637 IWineD3DVertexShader* iface,
638 BYTE usage_req, BYTE usage_idx_req,
639 unsigned int* regnum) {
641 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
644 for (i = 0; i < MAX_ATTRIBS; i++) {
645 DWORD usage_token = This->semantics_in[i].usage;
646 DWORD usage = (usage_token & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
647 DWORD usage_idx = (usage_token & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
649 if (usage_token && (usage == usage_req && usage_idx == usage_idx_req)) {
657 BOOL vshader_input_is_color(
658 IWineD3DVertexShader* iface,
659 unsigned int regnum) {
661 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
662 DWORD usage_token = This->semantics_in[regnum].usage;
663 DWORD usage = (usage_token & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
664 DWORD usage_idx = (usage_token & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
666 IWineD3DVertexDeclarationImpl *vertexDeclaration = NULL;
667 if (This->vertexDeclaration) {
668 /* D3D8 declaration */
669 vertexDeclaration = (IWineD3DVertexDeclarationImpl *)This->vertexDeclaration;
671 /* D3D9 declaration */
672 vertexDeclaration = (IWineD3DVertexDeclarationImpl *)((IWineD3DDeviceImpl *)This->wineD3DDevice)->stateBlock->vertexDecl;
675 if (vertexDeclaration) {
677 /* Find the declaration element that matches our register, then check
678 * if it has D3DCOLOR as it's type. This works for both d3d8 and d3d9. */
679 for (i = 0; i < vertexDeclaration->declarationWNumElements-1; ++i) {
680 WINED3DVERTEXELEMENT *element = vertexDeclaration->pDeclarationWine + i;
681 if ((element->Usage == usage && element->UsageIndex == usage_idx)) {
682 return element->Type == WINED3DDECLTYPE_D3DCOLOR;
687 ERR("Either no vertexdeclaration present, or register not matched. This should never happen.\n");
691 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
692 or GLSL and send it to the card */
693 static VOID IWineD3DVertexShaderImpl_GenerateShader(
694 IWineD3DVertexShader *iface,
695 shader_reg_maps* reg_maps,
696 CONST DWORD *pFunction) {
698 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
699 SHADER_BUFFER buffer;
701 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
702 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
703 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
704 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
705 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
706 This->fixupVertexBufferSize = PGMSIZE;
707 This->fixupVertexBuffer[0] = 0;
709 buffer.buffer = This->device->fixupVertexBuffer;
711 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
716 if (This->baseShader.shader_mode == SHADER_GLSL) {
718 /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
719 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
721 /* Base Declarations */
722 shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
724 /* Base Shader Body */
725 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
727 /* Unpack 3.0 outputs */
728 if (This->baseShader.hex_version >= D3DVS_VERSION(3,0))
729 vshader_glsl_output_unpack(&buffer, This->semantics_out);
731 /* Clamp the fog from 0 to 1 if it's used */
734 shader_addline(&buffer, "gl_FogFragCoord = clamp(gl_FogFragCoord, 0.0, 1.0);\n");
737 /* Write the final position.
738 * Account for any inverted textures (render to texture case) by reversing the y coordinate
739 * (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices) */
740 shader_addline(&buffer, "gl_Position.y = gl_Position.y * gl_ProjectionMatrix[1][1];\n");
742 shader_addline(&buffer, "}\n\0");
744 TRACE("Compiling shader object %u\n", shader_obj);
745 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
746 GL_EXTCALL(glCompileShaderARB(shader_obj));
747 print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
749 /* Store the shader object */
750 This->baseShader.prgId = shader_obj;
752 } else if (This->baseShader.shader_mode == SHADER_ARB) {
754 /* Create the hw ARB shader */
755 shader_addline(&buffer, "!!ARBvp1.0\n");
757 /* Mesa supports only 95 constants */
758 if (GL_VEND(MESA) || GL_VEND(WINE))
759 This->baseShader.limits.constant_float =
760 min(95, This->baseShader.limits.constant_float);
762 /* Base Declarations */
763 shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
765 /* We need the projection matrix to correctly render upside-down objects (render to texture) */
766 shader_addline(&buffer, "PARAM PROJECTION = state.matrix.projection.row[1];\n");
770 shader_addline(&buffer, "TEMP TMP_FOG;\n");
773 /* Base Shader Body */
774 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
776 /* Make sure the fog value is positive - values above 1.0 are ignored */
778 shader_addline(&buffer, "MAX result.fogcoord, TMP_FOG, 0.0;\n");
780 /* Write the final position.
781 * Account for any inverted textures (render to texture case) by reversing the y coordinate
782 * (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices) */
783 shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
784 shader_addline(&buffer, "MUL result.position.y, TMP_OUT.y, PROJECTION.y;\n");
786 shader_addline(&buffer, "END\n\0");
788 /* TODO: change to resource.glObjectHandle or something like that */
789 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
791 TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
792 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
794 TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
795 /* Create the program and check for errors */
796 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
797 buffer.bsize, buffer.buffer));
799 if (glGetError() == GL_INVALID_OPERATION) {
801 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
802 FIXME("HW VertexShader Error at position %d: %s\n",
803 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
804 This->baseShader.prgId = -1;
808 #if 1 /* if were using the data buffer of device then we don't need to free it */
809 HeapFree(GetProcessHeap(), 0, buffer.buffer);
813 BOOL IWineD3DVertexShaderImpl_ExecuteHAL(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
815 * TODO: use the NV_vertex_program (or 1_1) extension
816 * and specifics vendors (ARB_vertex_program??) variants for it
821 HRESULT WINAPI IWineD3DVertexShaderImpl_ExecuteSW(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
822 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
825 /** Vertex Shader Temporary Registers */
826 WINED3DSHADERVECTOR R[12];
827 /*D3DSHADERSCALAR A0;*/
828 WINED3DSHADERVECTOR A[1];
829 /** temporary Vector for modifier management */
830 WINED3DSHADERVECTOR d;
831 WINED3DSHADERVECTOR s[3];
833 const DWORD* pToken = This->baseShader.function;
834 const SHADER_OPCODE* curOpcode = NULL;
835 /** functions parameters */
836 WINED3DSHADERVECTOR* p[6];
837 WINED3DSHADERVECTOR* p_send[6];
840 /** init temporary register */
841 memset(R, 0, 12 * sizeof(WINED3DSHADERVECTOR));
843 /* vshader_program_parse(vshader); */
844 #if 0 /* Must not be 1 in cvs */
846 TRACE_VSVECTOR(This->data->C[0]);
847 TRACE_VSVECTOR(This->data->C[1]);
848 TRACE_VSVECTOR(This->data->C[2]);
849 TRACE_VSVECTOR(This->data->C[3]);
850 TRACE_VSVECTOR(This->data->C[4]);
851 TRACE_VSVECTOR(This->data->C[5]);
852 TRACE_VSVECTOR(This->data->C[6]);
853 TRACE_VSVECTOR(This->data->C[7]);
854 TRACE_VSVECTOR(This->data->C[8]);
855 TRACE_VSVECTOR(This->data->C[64]);
856 TRACE_VSVECTOR(input->V[D3DVSDE_POSITION]);
857 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDWEIGHT]);
858 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDINDICES]);
859 TRACE_VSVECTOR(input->V[D3DVSDE_NORMAL]);
860 TRACE_VSVECTOR(input->V[D3DVSDE_PSIZE]);
861 TRACE_VSVECTOR(input->V[D3DVSDE_DIFFUSE]);
862 TRACE_VSVECTOR(input->V[D3DVSDE_SPECULAR]);
863 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD0]);
864 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD1]);
867 TRACE_VSVECTOR(vshader->data->C[64]);
868 /* TODO: Run through all the tokens and find and labels, if, endifs, loops etc...., and make a labels list */
870 /* the first dword is the version tag */
873 if (shader_is_vshader_version(*pToken)) { /** version */
876 while (D3DVS_END() != *pToken) {
877 if (shader_is_comment(*pToken)) { /** comment */
878 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
880 pToken += comment_len;
884 opcode_token = *pToken++;
885 curOpcode = shader_get_opcode((IWineD3DBaseShader*) This, opcode_token);
887 if (NULL == curOpcode) {
888 FIXME("Unrecognized opcode: token=%08lX\n", opcode_token);
889 pToken += shader_skip_unrecognized((IWineD3DBaseShader*) This, pToken);
893 if (curOpcode->num_params > 0) {
894 /* TRACE(">> execting opcode: pos=%d opcode_name=%s token=%08lX\n", pToken - vshader->function, curOpcode->name, *pToken); */
895 for (i = 0; i < curOpcode->num_params; ++i) {
896 DWORD reg = pToken[i] & D3DSP_REGNUM_MASK;
897 DWORD regtype = shader_get_regtype(pToken[i]);
901 /* TRACE("p[%d]=R[%d]\n", i, reg); */
905 /* TRACE("p[%d]=V[%s]\n", i, VertexShaderDeclRegister[reg]); */
906 p[i] = &input->V[reg];
909 if (pToken[i] & D3DVS_ADDRMODE_RELATIVE) {
910 p[i] = &This->data->C[(DWORD) A[0].x + reg];
912 p[i] = &This->data->C[reg];
915 case D3DSPR_ADDR: /* case D3DSPR_TEXTURE: */
917 ERR("cannot handle address registers != a0, forcing use of a0\n");
920 /* TRACE("p[%d]=A[%d]\n", i, reg); */
925 case D3DSRO_POSITION:
926 p[i] = &output->oPos;
929 p[i] = &output->oFog;
931 case D3DSRO_POINT_SIZE:
932 p[i] = &output->oPts;
937 /* TRACE("p[%d]=oD[%d]\n", i, reg); */
938 p[i] = &output->oD[reg];
940 case D3DSPR_TEXCRDOUT:
941 /* TRACE("p[%d]=oT[%d]\n", i, reg); */
942 p[i] = &output->oT[reg];
944 /* TODO Decls and defs */
953 if (i > 0) { /* input reg */
954 DWORD swizzle = (pToken[i] & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
955 UINT isNegative = ((pToken[i] & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG);
957 if (!isNegative && (D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) {
958 /* TRACE("p[%d] not swizzled\n", i); */
961 DWORD swizzle_x = swizzle & 0x03;
962 DWORD swizzle_y = (swizzle >> 2) & 0x03;
963 DWORD swizzle_z = (swizzle >> 4) & 0x03;
964 DWORD swizzle_w = (swizzle >> 6) & 0x03;
965 /* TRACE("p[%d] swizzled\n", i); */
966 float* tt = (float*) p[i];
967 s[i].x = (isNegative) ? -tt[swizzle_x] : tt[swizzle_x];
968 s[i].y = (isNegative) ? -tt[swizzle_y] : tt[swizzle_y];
969 s[i].z = (isNegative) ? -tt[swizzle_z] : tt[swizzle_z];
970 s[i].w = (isNegative) ? -tt[swizzle_w] : tt[swizzle_w];
973 } else { /* output reg */
974 if ((pToken[i] & D3DSP_WRITEMASK_ALL) == D3DSP_WRITEMASK_ALL) {
977 p_send[i] = &d; /* to be post-processed for modifiers management */
983 switch (curOpcode->num_params) {
985 curOpcode->soft_fct();
988 curOpcode->soft_fct(p_send[0]);
991 curOpcode->soft_fct(p_send[0], p_send[1]);
994 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2]);
997 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3]);
1000 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4]);
1003 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4], p_send[5]);
1006 ERR("%s too many params: %u\n", curOpcode->name, curOpcode->num_params);
1009 /* check if output reg modifier post-process */
1010 if (curOpcode->num_params > 0 && (pToken[0] & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1011 if (pToken[0] & D3DSP_WRITEMASK_0) p[0]->x = d.x;
1012 if (pToken[0] & D3DSP_WRITEMASK_1) p[0]->y = d.y;
1013 if (pToken[0] & D3DSP_WRITEMASK_2) p[0]->z = d.z;
1014 if (pToken[0] & D3DSP_WRITEMASK_3) p[0]->w = d.w;
1017 TRACE_VSVECTOR(output->oPos);
1018 TRACE_VSVECTOR(output->oD[0]);
1019 TRACE_VSVECTOR(output->oD[1]);
1020 TRACE_VSVECTOR(output->oT[0]);
1021 TRACE_VSVECTOR(output->oT[1]);
1022 TRACE_VSVECTOR(R[0]);
1023 TRACE_VSVECTOR(R[1]);
1024 TRACE_VSVECTOR(R[2]);
1025 TRACE_VSVECTOR(R[3]);
1026 TRACE_VSVECTOR(R[4]);
1027 TRACE_VSVECTOR(R[5]);
1030 /* to next opcode token */
1031 pToken += curOpcode->num_params;
1034 TRACE("End of current instruction:\n");
1035 TRACE_VSVECTOR(output->oPos);
1036 TRACE_VSVECTOR(output->oD[0]);
1037 TRACE_VSVECTOR(output->oD[1]);
1038 TRACE_VSVECTOR(output->oT[0]);
1039 TRACE_VSVECTOR(output->oT[1]);
1040 TRACE_VSVECTOR(R[0]);
1041 TRACE_VSVECTOR(R[1]);
1042 TRACE_VSVECTOR(R[2]);
1043 TRACE_VSVECTOR(R[3]);
1044 TRACE_VSVECTOR(R[4]);
1045 TRACE_VSVECTOR(R[5]);
1048 #if 0 /* Must not be 1 in cvs */
1050 TRACE_VSVECTOR(output->oPos);
1051 TRACE_VSVECTOR(output->oD[0]);
1052 TRACE_VSVECTOR(output->oD[1]);
1053 TRACE_VSVECTOR(output->oT[0]);
1054 TRACE_VSVECTOR(output->oT[1]);
1059 /* *******************************************
1060 IWineD3DVertexShader IUnknown parts follow
1061 ******************************************* */
1062 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
1064 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1065 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
1066 if (IsEqualGUID(riid, &IID_IUnknown)
1067 || IsEqualGUID(riid, &IID_IWineD3DBase)
1068 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
1069 || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
1070 IUnknown_AddRef(iface);
1075 return E_NOINTERFACE;
1078 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
1079 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1080 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
1081 return InterlockedIncrement(&This->ref);
1084 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
1085 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1087 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
1088 ref = InterlockedDecrement(&This->ref);
1090 if (This->vertexDeclaration) IWineD3DVertexDeclaration_Release(This->vertexDeclaration);
1091 if (This->baseShader.shader_mode == SHADER_GLSL && This->baseShader.prgId != 0) {
1092 /* If this shader is still attached to a program, GL will perform a lazy delete */
1093 TRACE("Deleting shader object %u\n", This->baseShader.prgId);
1094 GL_EXTCALL(glDeleteObjectARB(This->baseShader.prgId));
1095 checkGLcall("glDeleteObjectARB");
1097 shader_delete_constant_list(&This->baseShader.constantsF);
1098 shader_delete_constant_list(&This->baseShader.constantsB);
1099 shader_delete_constant_list(&This->baseShader.constantsI);
1100 HeapFree(GetProcessHeap(), 0, This);
1106 /* *******************************************
1107 IWineD3DVertexShader IWineD3DVertexShader parts follow
1108 ******************************************* */
1110 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
1111 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1113 *parent = This->parent;
1114 IUnknown_AddRef(*parent);
1115 TRACE("(%p) : returning %p\n", This, *parent);
1119 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
1120 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1121 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
1122 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
1123 TRACE("(%p) returning %p\n", This, *pDevice);
1127 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
1128 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
1129 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
1131 if (NULL == pData) {
1132 *pSizeOfData = This->baseShader.functionLength;
1135 if (*pSizeOfData < This->baseShader.functionLength) {
1136 *pSizeOfData = This->baseShader.functionLength;
1137 return WINED3DERR_MOREDATA;
1139 if (NULL == This->baseShader.function) { /* no function defined */
1140 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
1141 (*(DWORD **) pData) = NULL;
1143 if(This->baseShader.functionLength == 0){
1146 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
1147 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
1152 /* Note that for vertex shaders CompileShader isn't called until the
1153 * shader is first used. The reason for this is that we need the vertex
1154 * declaration the shader will be used with in order to determine if
1155 * the data in a register is of type D3DCOLOR, and needs swizzling. */
1156 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
1158 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
1160 shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
1162 TRACE("(%p) : pFunction %p\n", iface, pFunction);
1164 /* First pass: trace shader */
1165 shader_trace_init((IWineD3DBaseShader*) This, pFunction);
1166 vshader_set_limits(This);
1168 /* Initialize immediate constant lists */
1169 list_init(&This->baseShader.constantsF);
1170 list_init(&This->baseShader.constantsB);
1171 list_init(&This->baseShader.constantsI);
1173 /* Preload semantics for d3d8 shaders */
1174 if (This->vertexDeclaration) {
1175 IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*) This->vertexDeclaration;
1177 for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
1178 WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
1179 vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
1183 /* Second pass: figure out registers used, semantics, etc.. */
1184 memset(reg_maps, 0, sizeof(shader_reg_maps));
1185 hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
1186 This->semantics_in, This->semantics_out, pFunction);
1187 if (hr != WINED3D_OK) return hr;
1189 This->baseShader.shader_mode = wined3d_settings.vs_selected_mode;
1191 /* copy the function ... because it will certainly be released by application */
1192 if (NULL != pFunction) {
1193 This->baseShader.function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1194 if (!This->baseShader.function) return E_OUTOFMEMORY;
1195 memcpy((void *)This->baseShader.function, pFunction, This->baseShader.functionLength);
1197 This->baseShader.function = NULL;
1203 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
1204 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1205 CONST DWORD *function = This->baseShader.function;
1207 TRACE("(%p) : function %p\n", iface, function);
1209 /* We're already compiled. */
1210 if (This->baseShader.is_compiled) return WINED3D_OK;
1212 /* We don't need to compile */
1213 if (!function || This->baseShader.shader_mode == SHADER_SW) {
1214 This->baseShader.is_compiled = TRUE;
1218 /* Generate the HW shader */
1219 TRACE("(%p) : Generating hardware program\n", This);
1220 IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
1222 This->baseShader.is_compiled = TRUE;
1227 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
1229 /*** IUnknown methods ***/
1230 IWineD3DVertexShaderImpl_QueryInterface,
1231 IWineD3DVertexShaderImpl_AddRef,
1232 IWineD3DVertexShaderImpl_Release,
1233 /*** IWineD3DBase methods ***/
1234 IWineD3DVertexShaderImpl_GetParent,
1235 /*** IWineD3DBaseShader methods ***/
1236 IWineD3DVertexShaderImpl_SetFunction,
1237 IWineD3DVertexShaderImpl_CompileShader,
1238 /*** IWineD3DVertexShader methods ***/
1239 IWineD3DVertexShaderImpl_GetDevice,
1240 IWineD3DVertexShaderImpl_GetFunction