2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2004 Christian Costa
7 * Copyright 2005 Oliver Stieber
8 * Copyright 2006 Ivan Gyurdiev
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "wined3d_private.h"
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
36 /* Shader debugging - Change the following line to enable debugging of software
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
43 # define TRACE_VSVECTOR(name)
47 * DirectX9 SDK download
48 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
51 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
53 * Using Vertex Shaders
54 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
57 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
60 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
66 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
69 * http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
71 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72 * http://developer.nvidia.com/view.asp?IO=vstovp
74 * NVIDIA: Memory Management with VAR
75 * http://developer.nvidia.com/view.asp?IO=var_memory_management
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
81 #define GLNAME_REQUIRE_GLSL ((const char *)1)
83 /*******************************
84 * vshader functions software VM
87 static void vshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
92 VSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
93 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
96 static void vshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
97 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
98 VSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
99 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
102 static void vshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
103 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
104 VSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
105 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
108 static void vshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
110 d->y = s0->y * s1->y;
113 VSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
114 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
117 static void vshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
123 tmp.f = floorf(s0->w);
124 d->x = powf(2.0f, tmp.f);
125 d->y = s0->w - tmp.f;
126 tmp.f = powf(2.0f, s0->w);
127 tmp.d &= 0xFFFFFF00U;
130 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
131 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
134 static void vshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
136 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
137 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
139 VSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
140 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
143 static void vshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
144 float tmp_f = fabsf(s0->w);
145 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
146 VSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
147 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
150 static void vshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
151 d->x = s0->x * s1->x + s2->x;
152 d->y = s0->y * s1->y + s2->y;
153 d->z = s0->z * s1->z + s2->z;
154 d->w = s0->w * s1->w + s2->w;
155 VSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
156 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
159 static void vshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
160 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
161 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
162 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
163 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
164 VSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
165 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
168 static void vshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
169 d->x = (s0->x < s1->x) ? s0->x : s1->x;
170 d->y = (s0->y < s1->y) ? s0->y : s1->y;
171 d->z = (s0->z < s1->z) ? s0->z : s1->z;
172 d->w = (s0->w < s1->w) ? s0->w : s1->w;
173 VSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
174 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
177 static void vshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
182 VSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
183 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
186 static void vshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
187 d->x = s0->x * s1->x;
188 d->y = s0->y * s1->y;
189 d->z = s0->z * s1->z;
190 d->w = s0->w * s1->w;
191 VSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
192 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
195 static void vshader_nop(void) {
196 /* NOPPPP ahhh too easy ;) */
197 VSTRACE(("executing nop\n"));
200 static void vshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
201 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
202 VSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
203 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
206 static void vshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
207 float tmp_f = fabsf(s0->w);
208 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
209 VSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
210 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
213 static void vshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
214 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
215 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
216 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
217 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
218 VSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
219 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
222 static void vshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
223 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
224 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
225 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
226 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
227 VSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
228 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
231 static void vshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
232 d->x = s0->x - s1->x;
233 d->y = s0->y - s1->y;
234 d->z = s0->z - s1->z;
235 d->w = s0->w - s1->w;
236 VSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
237 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
241 * Version 1.1 specific
244 static void vshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
245 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
246 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
247 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
250 static void vshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
251 float tmp_f = fabsf(s0->w);
252 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
253 VSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
254 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
257 static void vshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
258 d->x = s0->x - floorf(s0->x);
259 d->y = s0->y - floorf(s0->y);
262 VSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
263 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
266 typedef FLOAT D3DMATRIX44[4][4];
267 typedef FLOAT D3DMATRIX43[4][3];
268 typedef FLOAT D3DMATRIX34[3][4];
269 typedef FLOAT D3DMATRIX33[3][3];
270 typedef FLOAT D3DMATRIX23[2][3];
272 static void vshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
274 * Buggy CODE: here only if cast not work for copy/paste
275 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
276 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
277 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
278 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
279 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
280 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
281 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
283 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
284 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
285 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
286 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
287 VSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
288 VSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
289 VSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
290 VSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
293 static void vshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
294 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
295 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
296 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
298 VSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
299 VSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
300 VSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
301 VSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
304 static void vshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
305 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
306 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
307 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
308 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
309 VSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
310 VSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
311 VSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
312 VSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
315 static void vshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
316 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
317 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
318 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
320 VSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
321 VSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
322 VSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
323 VSTRACE(("executing m3x3(4): (%f) \n", d->w));
326 static void vshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
328 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
329 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
335 * Version 2.0 specific
337 static void vshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
338 d->x = s0->x * (s1->x - s2->x) + s2->x;
339 d->y = s0->y * (s1->y - s2->y) + s2->y;
340 d->z = s0->z * (s1->z - s2->z) + s2->z;
341 d->w = s0->w * (s1->w - s2->w) + s2->w;
344 static void vshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
345 d->x = s0->y * s1->z - s0->z * s1->y;
346 d->y = s0->z * s1->x - s0->x * s1->z;
347 d->z = s0->x * s1->y - s0->y * s1->x;
348 d->w = 0.9f; /* w is undefined, so set it to something safeish */
350 VSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
351 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
354 static void vshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
360 VSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
361 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
366 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
367 static void vshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
371 static void vshader_call(WINED3DSHADERVECTOR* d) {
375 static void vshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
379 static void vshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
383 static void vshader_ret(void) {
387 static void vshader_endloop(void) {
391 static void vshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
395 static void vshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
399 static void vshader_sgn(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
403 static void vshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
407 static void vshader_sincos3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
411 static void vshader_sincos2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
415 static void vshader_rep(WINED3DSHADERVECTOR* d) {
419 static void vshader_endrep(void) {
423 static void vshader_if(WINED3DSHADERVECTOR* d) {
427 static void vshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
431 static void vshader_else(void) {
435 static void vshader_label(WINED3DSHADERVECTOR* d) {
439 static void vshader_endif(void) {
443 static void vshader_break(void) {
447 static void vshader_breakc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
451 static void vshader_breakp(WINED3DSHADERVECTOR* d) {
455 static void vshader_mova(WINED3DSHADERVECTOR* d) {
459 static void vshader_defb(WINED3DSHADERVECTOR* d) {
463 static void vshader_defi(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
467 static void vshader_setp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
471 static void vshader_texldl(WINED3DSHADERVECTOR* d) {
476 * log, exp, frc, m*x* seems to be macros ins ... to see
478 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
481 {D3DSIO_NOP, "nop", "NOP", 0, 0, vshader_nop, vshader_hw_map2gl, NULL, 0, 0},
482 {D3DSIO_MOV, "mov", "MOV", 1, 2, vshader_mov, vshader_hw_map2gl, shader_glsl_mov, 0, 0},
483 {D3DSIO_ADD, "add", "ADD", 1, 3, vshader_add, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
484 {D3DSIO_SUB, "sub", "SUB", 1, 3, vshader_sub, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
485 {D3DSIO_MAD, "mad", "MAD", 1, 4, vshader_mad, vshader_hw_map2gl, shader_glsl_mad, 0, 0},
486 {D3DSIO_MUL, "mul", "MUL", 1, 3, vshader_mul, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
487 {D3DSIO_RCP, "rcp", "RCP", 1, 2, vshader_rcp, vshader_hw_map2gl, shader_glsl_rcp, 0, 0},
488 {D3DSIO_RSQ, "rsq", "RSQ", 1, 2, vshader_rsq, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
489 {D3DSIO_DP3, "dp3", "DP3", 1, 3, vshader_dp3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
490 {D3DSIO_DP4, "dp4", "DP4", 1, 3, vshader_dp4, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
491 {D3DSIO_MIN, "min", "MIN", 1, 3, vshader_min, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
492 {D3DSIO_MAX, "max", "MAX", 1, 3, vshader_max, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
493 {D3DSIO_SLT, "slt", "SLT", 1, 3, vshader_slt, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
494 {D3DSIO_SGE, "sge", "SGE", 1, 3, vshader_sge, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
495 {D3DSIO_ABS, "abs", "ABS", 1, 2, vshader_abs, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
496 {D3DSIO_EXP, "exp", "EX2", 1, 2, vshader_exp, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
497 {D3DSIO_LOG, "log", "LG2", 1, 2, vshader_log, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
498 {D3DSIO_EXPP, "expp", "EXP", 1, 2, vshader_expp, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
499 {D3DSIO_LOGP, "logp", "LOG", 1, 2, vshader_logp, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
500 {D3DSIO_LIT, "lit", "LIT", 1, 2, vshader_lit, vshader_hw_map2gl, shader_glsl_lit, 0, 0},
501 {D3DSIO_DST, "dst", "DST", 1, 3, vshader_dst, vshader_hw_map2gl, shader_glsl_dst, 0, 0},
502 {D3DSIO_LRP, "lrp", "LRP", 1, 4, vshader_lrp, NULL, shader_glsl_lrp, 0, 0},
503 {D3DSIO_FRC, "frc", "FRC", 1, 2, vshader_frc, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
504 {D3DSIO_POW, "pow", "POW", 1, 3, vshader_pow, NULL, shader_glsl_map2gl, 0, 0},
505 {D3DSIO_CRS, "crs", "XPS", 1, 3, vshader_crs, NULL, shader_glsl_map2gl, 0, 0},
506 /* TODO: sng can possibly be performed a s
509 {D3DSIO_SGN, "sgn", NULL, 1, 2, vshader_sgn, NULL, shader_glsl_map2gl, 0, 0},
510 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
513 MUL vec.xyz, vec, tmp;
514 but I think this is better because it accounts for w properly.
520 {D3DSIO_NRM, "nrm", NULL, 1, 2, vshader_nrm, NULL, shader_glsl_map2gl, 0, 0},
521 {D3DSIO_SINCOS, "sincos", NULL, 1, 4, vshader_sincos2, NULL, shader_glsl_sincos, D3DVS_VERSION(2,0), D3DVS_VERSION(2,0)},
522 {D3DSIO_SINCOS, "sincos", NULL, 1, 2, vshader_sincos3, NULL, shader_glsl_sincos, D3DVS_VERSION(3,0), -1},
525 {D3DSIO_M4x4, "m4x4", "undefined", 1, 3, vshader_m4x4, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
526 {D3DSIO_M4x3, "m4x3", "undefined", 1, 3, vshader_m4x3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
527 {D3DSIO_M3x4, "m3x4", "undefined", 1, 3, vshader_m3x4, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
528 {D3DSIO_M3x3, "m3x3", "undefined", 1, 3, vshader_m3x3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
529 {D3DSIO_M3x2, "m3x2", "undefined", 1, 3, vshader_m3x2, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
531 /* Declare registers */
532 {D3DSIO_DCL, "dcl", NULL, 0, 2, vshader_dcl, NULL, NULL, 0, 0},
534 /* Constant definitions */
535 {D3DSIO_DEF, "def", NULL, 1, 5, vshader_def, NULL, NULL, 0, 0},
536 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 1, 2, vshader_defb, NULL, NULL, 0, 0},
537 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 1, 5, vshader_defi, NULL, NULL, 0, 0},
539 /* Flow control - requires GLSL or software shaders */
540 {D3DSIO_REP , "rep", NULL, 0, 1, vshader_rep, NULL, shader_glsl_rep, D3DVS_VERSION(2,0), -1},
541 {D3DSIO_ENDREP, "endrep", NULL, 0, 0, vshader_endrep, NULL, shader_glsl_end, D3DVS_VERSION(2,0), -1},
542 {D3DSIO_IF, "if", NULL, 0, 1, vshader_if, NULL, shader_glsl_if, D3DVS_VERSION(2,0), -1},
543 {D3DSIO_IFC, "ifc", NULL, 0, 2, vshader_ifc, NULL, shader_glsl_ifc, D3DVS_VERSION(2,1), -1},
544 {D3DSIO_ELSE, "else", NULL, 0, 0, vshader_else, NULL, shader_glsl_else, D3DVS_VERSION(2,0), -1},
545 {D3DSIO_ENDIF, "endif", NULL, 0, 0, vshader_endif, NULL, shader_glsl_end, D3DVS_VERSION(2,0), -1},
546 {D3DSIO_BREAK, "break", NULL, 0, 0, vshader_break, NULL, shader_glsl_break, D3DVS_VERSION(2,1), -1},
547 {D3DSIO_BREAKC, "breakc", NULL, 0, 2, vshader_breakc, NULL, shader_glsl_breakc, D3DVS_VERSION(2,1), -1},
548 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 0, 1, vshader_breakp, NULL, NULL, 0, 0},
549 {D3DSIO_CALL, "call", NULL, 0, 1, vshader_call, NULL, shader_glsl_call, D3DVS_VERSION(2,0), -1},
550 {D3DSIO_CALLNZ, "callnz", NULL, 0, 2, vshader_callnz, NULL, shader_glsl_callnz, D3DVS_VERSION(2,0), -1},
551 {D3DSIO_LOOP, "loop", NULL, 0, 2, vshader_loop, NULL, shader_glsl_loop, D3DVS_VERSION(2,0), -1},
552 {D3DSIO_RET, "ret", NULL, 0, 0, vshader_ret, NULL, NULL, D3DVS_VERSION(2,0), -1},
553 {D3DSIO_ENDLOOP, "endloop", NULL, 0, 0, vshader_endloop, NULL, shader_glsl_end, D3DVS_VERSION(2,0), -1},
554 {D3DSIO_LABEL, "label", NULL, 0, 1, vshader_label, NULL, shader_glsl_label, D3DVS_VERSION(2,0), -1},
556 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 1, 2, vshader_mova, NULL, shader_glsl_mov, 0, 0},
557 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 1, 3, vshader_setp, NULL, NULL, 0, 0},
558 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 1, 2, vshader_texldl, NULL, NULL, 0, 0},
559 {0, NULL, NULL, 0, 0, NULL, NULL, 0, 0}
562 static void vshader_set_limits(
563 IWineD3DVertexShaderImpl *This) {
565 This->baseShader.limits.texcoord = 0;
566 This->baseShader.limits.attributes = 16;
567 This->baseShader.limits.packed_input = 0;
569 /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
570 This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
572 switch (This->baseShader.hex_version) {
573 case D3DVS_VERSION(1,0):
574 case D3DVS_VERSION(1,1):
575 This->baseShader.limits.temporary = 12;
576 This->baseShader.limits.constant_bool = 0;
577 This->baseShader.limits.constant_int = 0;
578 This->baseShader.limits.address = 1;
579 This->baseShader.limits.packed_output = 0;
580 This->baseShader.limits.sampler = 0;
581 This->baseShader.limits.label = 0;
584 case D3DVS_VERSION(2,0):
585 case D3DVS_VERSION(2,1):
586 This->baseShader.limits.temporary = 12;
587 This->baseShader.limits.constant_bool = 16;
588 This->baseShader.limits.constant_int = 16;
589 This->baseShader.limits.address = 1;
590 This->baseShader.limits.packed_output = 0;
591 This->baseShader.limits.sampler = 0;
592 This->baseShader.limits.label = 16;
595 case D3DVS_VERSION(3,0):
596 This->baseShader.limits.temporary = 32;
597 This->baseShader.limits.constant_bool = 32;
598 This->baseShader.limits.constant_int = 32;
599 This->baseShader.limits.address = 1;
600 This->baseShader.limits.packed_output = 12;
601 This->baseShader.limits.sampler = 4;
602 This->baseShader.limits.label = 16; /* FIXME: 2048 */
605 default: This->baseShader.limits.temporary = 12;
606 This->baseShader.limits.constant_bool = 16;
607 This->baseShader.limits.constant_int = 16;
608 This->baseShader.limits.address = 1;
609 This->baseShader.limits.packed_output = 0;
610 This->baseShader.limits.sampler = 0;
611 This->baseShader.limits.label = 16;
612 FIXME("Unrecognized vertex shader version %#lx\n",
613 This->baseShader.hex_version);
617 /* This is an internal function,
618 * used to create fake semantics for shaders
619 * that don't have them - d3d8 shaders where the declaration
620 * stores the register for each input
622 static void vshader_set_input(
623 IWineD3DVertexShaderImpl* This,
625 BYTE usage, BYTE usage_idx) {
627 /* Fake usage: set reserved bit, usage, usage_idx */
628 DWORD usage_token = (0x1 << 31) |
629 (usage << D3DSP_DCL_USAGE_SHIFT) | (usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT);
631 /* Fake register; set reserved bit, regnum, type: input, wmask: all */
632 DWORD reg_token = (0x1 << 31) |
633 D3DSP_WRITEMASK_ALL | (D3DSPR_INPUT << D3DSP_REGTYPE_SHIFT) | regnum;
635 This->semantics_in[regnum].usage = usage_token;
636 This->semantics_in[regnum].reg = reg_token;
639 BOOL vshader_get_input(
640 IWineD3DVertexShader* iface,
641 BYTE usage_req, BYTE usage_idx_req,
642 unsigned int* regnum) {
644 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
647 for (i = 0; i < MAX_ATTRIBS; i++) {
648 DWORD usage_token = This->semantics_in[i].usage;
649 DWORD usage = (usage_token & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
650 DWORD usage_idx = (usage_token & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
652 if (usage_token && (usage == usage_req && usage_idx == usage_idx_req)) {
660 BOOL vshader_input_is_color(
661 IWineD3DVertexShader* iface,
662 unsigned int regnum) {
664 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
665 DWORD usage_token = This->semantics_in[regnum].usage;
666 DWORD usage = (usage_token & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
668 /* FIXME: D3D8 shader: the semantics token is not the way to
669 * determine color info, since it is just a fake map to shader inputs */
670 if (This->vertexDeclaration != NULL)
673 return usage == D3DDECLUSAGE_COLOR;
676 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
677 or GLSL and send it to the card */
678 static VOID IWineD3DVertexShaderImpl_GenerateShader(
679 IWineD3DVertexShader *iface,
680 shader_reg_maps* reg_maps,
681 CONST DWORD *pFunction) {
683 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
684 SHADER_BUFFER buffer;
686 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
687 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
688 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
689 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
690 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
691 This->fixupVertexBufferSize = PGMSIZE;
692 This->fixupVertexBuffer[0] = 0;
694 buffer.buffer = This->device->fixupVertexBuffer;
696 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
701 if (This->baseShader.shader_mode == SHADER_GLSL) {
703 /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
704 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
706 /* Base Declarations */
707 shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
709 /* Base Shader Body */
710 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
712 /* Unpack 3.0 outputs */
713 if (This->baseShader.hex_version >= D3DVS_VERSION(3,0))
714 vshader_glsl_output_unpack(&buffer, This->semantics_out);
716 /* Clamp the fog from 0 to 1 if it's used */
719 shader_addline(&buffer, "gl_FogFragCoord = clamp(gl_FogFragCoord, 0.0, 1.0);\n");
722 shader_addline(&buffer, "}\n\0");
724 TRACE("Compiling shader object %u\n", shader_obj);
725 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
726 GL_EXTCALL(glCompileShaderARB(shader_obj));
727 print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
729 /* Store the shader object */
730 This->baseShader.prgId = shader_obj;
732 } else if (This->baseShader.shader_mode == SHADER_ARB) {
734 /* Create the hw ARB shader */
735 shader_addline(&buffer, "!!ARBvp1.0\n");
737 /* Mesa supports only 95 constants */
738 if (GL_VEND(MESA) || GL_VEND(WINE))
739 This->baseShader.limits.constant_float =
740 min(95, This->baseShader.limits.constant_float);
742 /* Base Declarations */
743 shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
747 shader_addline(&buffer, "TEMP TMP_FOG;\n");
750 /* Base Shader Body */
751 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
753 /* Make sure the fog value is positive - values above 1.0 are ignored */
755 shader_addline(&buffer, "MAX result.fogcoord, TMP_FOG, 0.0;\n");
757 shader_addline(&buffer, "END\n\0");
759 /* TODO: change to resource.glObjectHandle or something like that */
760 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
762 TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
763 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
765 TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
766 /* Create the program and check for errors */
767 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
768 buffer.bsize, buffer.buffer));
770 if (glGetError() == GL_INVALID_OPERATION) {
772 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
773 FIXME("HW VertexShader Error at position %d: %s\n",
774 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
775 This->baseShader.prgId = -1;
779 #if 1 /* if were using the data buffer of device then we don't need to free it */
780 HeapFree(GetProcessHeap(), 0, buffer.buffer);
784 BOOL IWineD3DVertexShaderImpl_ExecuteHAL(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
786 * TODO: use the NV_vertex_program (or 1_1) extension
787 * and specifics vendors (ARB_vertex_program??) variants for it
792 HRESULT WINAPI IWineD3DVertexShaderImpl_ExecuteSW(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
793 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
796 /** Vertex Shader Temporary Registers */
797 WINED3DSHADERVECTOR R[12];
798 /*D3DSHADERSCALAR A0;*/
799 WINED3DSHADERVECTOR A[1];
800 /** temporary Vector for modifier management */
801 WINED3DSHADERVECTOR d;
802 WINED3DSHADERVECTOR s[3];
804 const DWORD* pToken = This->baseShader.function;
805 const SHADER_OPCODE* curOpcode = NULL;
806 /** functions parameters */
807 WINED3DSHADERVECTOR* p[6];
808 WINED3DSHADERVECTOR* p_send[6];
811 /** init temporary register */
812 memset(R, 0, 12 * sizeof(WINED3DSHADERVECTOR));
814 /* vshader_program_parse(vshader); */
815 #if 0 /* Must not be 1 in cvs */
817 TRACE_VSVECTOR(This->data->C[0]);
818 TRACE_VSVECTOR(This->data->C[1]);
819 TRACE_VSVECTOR(This->data->C[2]);
820 TRACE_VSVECTOR(This->data->C[3]);
821 TRACE_VSVECTOR(This->data->C[4]);
822 TRACE_VSVECTOR(This->data->C[5]);
823 TRACE_VSVECTOR(This->data->C[6]);
824 TRACE_VSVECTOR(This->data->C[7]);
825 TRACE_VSVECTOR(This->data->C[8]);
826 TRACE_VSVECTOR(This->data->C[64]);
827 TRACE_VSVECTOR(input->V[D3DVSDE_POSITION]);
828 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDWEIGHT]);
829 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDINDICES]);
830 TRACE_VSVECTOR(input->V[D3DVSDE_NORMAL]);
831 TRACE_VSVECTOR(input->V[D3DVSDE_PSIZE]);
832 TRACE_VSVECTOR(input->V[D3DVSDE_DIFFUSE]);
833 TRACE_VSVECTOR(input->V[D3DVSDE_SPECULAR]);
834 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD0]);
835 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD1]);
838 TRACE_VSVECTOR(vshader->data->C[64]);
839 /* TODO: Run through all the tokens and find and labels, if, endifs, loops etc...., and make a labels list */
841 /* the first dword is the version tag */
844 if (shader_is_vshader_version(*pToken)) { /** version */
847 while (D3DVS_END() != *pToken) {
848 if (shader_is_comment(*pToken)) { /** comment */
849 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
851 pToken += comment_len;
855 opcode_token = *pToken++;
856 curOpcode = shader_get_opcode((IWineD3DBaseShader*) This, opcode_token);
858 if (NULL == curOpcode) {
859 FIXME("Unrecognized opcode: token=%08lX\n", opcode_token);
860 pToken += shader_skip_unrecognized((IWineD3DBaseShader*) This, pToken);
864 if (curOpcode->num_params > 0) {
865 /* TRACE(">> execting opcode: pos=%d opcode_name=%s token=%08lX\n", pToken - vshader->function, curOpcode->name, *pToken); */
866 for (i = 0; i < curOpcode->num_params; ++i) {
867 DWORD reg = pToken[i] & D3DSP_REGNUM_MASK;
868 DWORD regtype = shader_get_regtype(pToken[i]);
872 /* TRACE("p[%d]=R[%d]\n", i, reg); */
876 /* TRACE("p[%d]=V[%s]\n", i, VertexShaderDeclRegister[reg]); */
877 p[i] = &input->V[reg];
880 if (pToken[i] & D3DVS_ADDRMODE_RELATIVE) {
881 p[i] = &This->data->C[(DWORD) A[0].x + reg];
883 p[i] = &This->data->C[reg];
886 case D3DSPR_ADDR: /* case D3DSPR_TEXTURE: */
888 ERR("cannot handle address registers != a0, forcing use of a0\n");
891 /* TRACE("p[%d]=A[%d]\n", i, reg); */
896 case D3DSRO_POSITION:
897 p[i] = &output->oPos;
900 p[i] = &output->oFog;
902 case D3DSRO_POINT_SIZE:
903 p[i] = &output->oPts;
908 /* TRACE("p[%d]=oD[%d]\n", i, reg); */
909 p[i] = &output->oD[reg];
911 case D3DSPR_TEXCRDOUT:
912 /* TRACE("p[%d]=oT[%d]\n", i, reg); */
913 p[i] = &output->oT[reg];
915 /* TODO Decls and defs */
924 if (i > 0) { /* input reg */
925 DWORD swizzle = (pToken[i] & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
926 UINT isNegative = ((pToken[i] & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG);
928 if (!isNegative && (D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) {
929 /* TRACE("p[%d] not swizzled\n", i); */
932 DWORD swizzle_x = swizzle & 0x03;
933 DWORD swizzle_y = (swizzle >> 2) & 0x03;
934 DWORD swizzle_z = (swizzle >> 4) & 0x03;
935 DWORD swizzle_w = (swizzle >> 6) & 0x03;
936 /* TRACE("p[%d] swizzled\n", i); */
937 float* tt = (float*) p[i];
938 s[i].x = (isNegative) ? -tt[swizzle_x] : tt[swizzle_x];
939 s[i].y = (isNegative) ? -tt[swizzle_y] : tt[swizzle_y];
940 s[i].z = (isNegative) ? -tt[swizzle_z] : tt[swizzle_z];
941 s[i].w = (isNegative) ? -tt[swizzle_w] : tt[swizzle_w];
944 } else { /* output reg */
945 if ((pToken[i] & D3DSP_WRITEMASK_ALL) == D3DSP_WRITEMASK_ALL) {
948 p_send[i] = &d; /* to be post-processed for modifiers management */
954 switch (curOpcode->num_params) {
956 curOpcode->soft_fct();
959 curOpcode->soft_fct(p_send[0]);
962 curOpcode->soft_fct(p_send[0], p_send[1]);
965 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2]);
968 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3]);
971 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4]);
974 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4], p_send[5]);
977 ERR("%s too many params: %u\n", curOpcode->name, curOpcode->num_params);
980 /* check if output reg modifier post-process */
981 if (curOpcode->num_params > 0 && (pToken[0] & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
982 if (pToken[0] & D3DSP_WRITEMASK_0) p[0]->x = d.x;
983 if (pToken[0] & D3DSP_WRITEMASK_1) p[0]->y = d.y;
984 if (pToken[0] & D3DSP_WRITEMASK_2) p[0]->z = d.z;
985 if (pToken[0] & D3DSP_WRITEMASK_3) p[0]->w = d.w;
988 TRACE_VSVECTOR(output->oPos);
989 TRACE_VSVECTOR(output->oD[0]);
990 TRACE_VSVECTOR(output->oD[1]);
991 TRACE_VSVECTOR(output->oT[0]);
992 TRACE_VSVECTOR(output->oT[1]);
993 TRACE_VSVECTOR(R[0]);
994 TRACE_VSVECTOR(R[1]);
995 TRACE_VSVECTOR(R[2]);
996 TRACE_VSVECTOR(R[3]);
997 TRACE_VSVECTOR(R[4]);
998 TRACE_VSVECTOR(R[5]);
1001 /* to next opcode token */
1002 pToken += curOpcode->num_params;
1005 TRACE("End of current instruction:\n");
1006 TRACE_VSVECTOR(output->oPos);
1007 TRACE_VSVECTOR(output->oD[0]);
1008 TRACE_VSVECTOR(output->oD[1]);
1009 TRACE_VSVECTOR(output->oT[0]);
1010 TRACE_VSVECTOR(output->oT[1]);
1011 TRACE_VSVECTOR(R[0]);
1012 TRACE_VSVECTOR(R[1]);
1013 TRACE_VSVECTOR(R[2]);
1014 TRACE_VSVECTOR(R[3]);
1015 TRACE_VSVECTOR(R[4]);
1016 TRACE_VSVECTOR(R[5]);
1019 #if 0 /* Must not be 1 in cvs */
1021 TRACE_VSVECTOR(output->oPos);
1022 TRACE_VSVECTOR(output->oD[0]);
1023 TRACE_VSVECTOR(output->oD[1]);
1024 TRACE_VSVECTOR(output->oT[0]);
1025 TRACE_VSVECTOR(output->oT[1]);
1030 /* *******************************************
1031 IWineD3DVertexShader IUnknown parts follow
1032 ******************************************* */
1033 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
1035 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1036 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
1037 if (IsEqualGUID(riid, &IID_IUnknown)
1038 || IsEqualGUID(riid, &IID_IWineD3DBase)
1039 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
1040 || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
1041 IUnknown_AddRef(iface);
1046 return E_NOINTERFACE;
1049 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
1050 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1051 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
1052 return InterlockedIncrement(&This->ref);
1055 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
1056 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1058 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
1059 ref = InterlockedDecrement(&This->ref);
1061 if (This->vertexDeclaration) IWineD3DVertexDeclaration_Release(This->vertexDeclaration);
1062 if (This->baseShader.shader_mode == SHADER_GLSL && This->baseShader.prgId != 0) {
1063 /* If this shader is still attached to a program, GL will perform a lazy delete */
1064 TRACE("Deleting shader object %u\n", This->baseShader.prgId);
1065 GL_EXTCALL(glDeleteObjectARB(This->baseShader.prgId));
1066 checkGLcall("glDeleteObjectARB");
1068 shader_delete_constant_list(&This->baseShader.constantsF);
1069 shader_delete_constant_list(&This->baseShader.constantsB);
1070 shader_delete_constant_list(&This->baseShader.constantsI);
1071 HeapFree(GetProcessHeap(), 0, This);
1077 /* *******************************************
1078 IWineD3DVertexShader IWineD3DVertexShader parts follow
1079 ******************************************* */
1081 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
1082 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1084 *parent = This->parent;
1085 IUnknown_AddRef(*parent);
1086 TRACE("(%p) : returning %p\n", This, *parent);
1090 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
1091 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1092 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
1093 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
1094 TRACE("(%p) returning %p\n", This, *pDevice);
1098 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
1099 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
1100 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
1102 if (NULL == pData) {
1103 *pSizeOfData = This->baseShader.functionLength;
1106 if (*pSizeOfData < This->baseShader.functionLength) {
1107 *pSizeOfData = This->baseShader.functionLength;
1108 return WINED3DERR_MOREDATA;
1110 if (NULL == This->baseShader.function) { /* no function defined */
1111 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
1112 (*(DWORD **) pData) = NULL;
1114 if(This->baseShader.functionLength == 0){
1117 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
1118 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
1123 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
1125 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
1127 shader_reg_maps reg_maps;
1129 /* First pass: trace shader */
1130 shader_trace_init((IWineD3DBaseShader*) This, pFunction);
1131 vshader_set_limits(This);
1133 /* Initialize immediate constant lists */
1134 list_init(&This->baseShader.constantsF);
1135 list_init(&This->baseShader.constantsB);
1136 list_init(&This->baseShader.constantsI);
1138 /* Preload semantics for d3d8 shaders */
1139 if (This->vertexDeclaration) {
1140 IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*) This->vertexDeclaration;
1142 for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
1143 WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
1144 vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
1148 /* Second pass: figure out registers used, semantics, etc.. */
1149 memset(®_maps, 0, sizeof(shader_reg_maps));
1150 hr = shader_get_registers_used((IWineD3DBaseShader*) This, ®_maps,
1151 This->semantics_in, This->semantics_out, pFunction);
1152 if (hr != WINED3D_OK) return hr;
1154 /* Generate HW shader in needed */
1155 This->baseShader.shader_mode = wined3d_settings.vs_selected_mode;
1156 if (NULL != pFunction && This->baseShader.shader_mode != SHADER_SW)
1157 IWineD3DVertexShaderImpl_GenerateShader(iface, ®_maps, pFunction);
1159 /* copy the function ... because it will certainly be released by application */
1160 if (NULL != pFunction) {
1161 This->baseShader.function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1162 if (!This->baseShader.function) return E_OUTOFMEMORY;
1163 memcpy((void *)This->baseShader.function, pFunction, This->baseShader.functionLength);
1165 This->baseShader.function = NULL;
1171 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
1173 /*** IUnknown methods ***/
1174 IWineD3DVertexShaderImpl_QueryInterface,
1175 IWineD3DVertexShaderImpl_AddRef,
1176 IWineD3DVertexShaderImpl_Release,
1177 /*** IWineD3DBase methods ***/
1178 IWineD3DVertexShaderImpl_GetParent,
1179 /*** IWineD3DBaseShader methods ***/
1180 IWineD3DVertexShaderImpl_SetFunction,
1181 /*** IWineD3DVertexShader methods ***/
1182 IWineD3DVertexShaderImpl_GetDevice,
1183 IWineD3DVertexShaderImpl_GetFunction