2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2004 Christian Costa
7 * Copyright 2005 Oliver Stieber
8 * Copyright 2006 Ivan Gyurdiev
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "wined3d_private.h"
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->baseShader.device)->wineD3D))->gl_info
36 /* Shader debugging - Change the following line to enable debugging of software
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
43 # define TRACE_VSVECTOR(name)
47 * DirectX9 SDK download
48 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
51 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
53 * Using Vertex Shaders
54 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
57 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
60 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
66 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
69 * http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
71 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72 * http://developer.nvidia.com/view.asp?IO=vstovp
74 * NVIDIA: Memory Management with VAR
75 * http://developer.nvidia.com/view.asp?IO=var_memory_management
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
81 #define GLNAME_REQUIRE_GLSL ((const char *)1)
83 /*******************************
84 * vshader functions software VM
87 static void vshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
92 VSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
93 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
96 static void vshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
97 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
98 VSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
99 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
102 static void vshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
103 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
104 VSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
105 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
108 static void vshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
110 d->y = s0->y * s1->y;
113 VSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
114 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
117 static void vshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
123 tmp.f = floorf(s0->w);
124 d->x = powf(2.0f, tmp.f);
125 d->y = s0->w - tmp.f;
126 tmp.f = powf(2.0f, s0->w);
127 tmp.d &= 0xFFFFFF00U;
130 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
131 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
134 static void vshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
136 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
137 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
139 VSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
140 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
143 static void vshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
144 float tmp_f = fabsf(s0->w);
145 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
146 VSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
147 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
150 static void vshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
151 d->x = s0->x * s1->x + s2->x;
152 d->y = s0->y * s1->y + s2->y;
153 d->z = s0->z * s1->z + s2->z;
154 d->w = s0->w * s1->w + s2->w;
155 VSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
156 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
159 static void vshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
160 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
161 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
162 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
163 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
164 VSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
165 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
168 static void vshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
169 d->x = (s0->x < s1->x) ? s0->x : s1->x;
170 d->y = (s0->y < s1->y) ? s0->y : s1->y;
171 d->z = (s0->z < s1->z) ? s0->z : s1->z;
172 d->w = (s0->w < s1->w) ? s0->w : s1->w;
173 VSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
174 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
177 static void vshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
182 VSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
183 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
186 static void vshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
187 d->x = s0->x * s1->x;
188 d->y = s0->y * s1->y;
189 d->z = s0->z * s1->z;
190 d->w = s0->w * s1->w;
191 VSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
192 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
195 static void vshader_nop(void) {
196 /* NOPPPP ahhh too easy ;) */
197 VSTRACE(("executing nop\n"));
200 static void vshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
201 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
202 VSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
203 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
206 static void vshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
207 float tmp_f = fabsf(s0->w);
208 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
209 VSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
210 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
213 static void vshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
214 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
215 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
216 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
217 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
218 VSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
219 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
222 static void vshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
223 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
224 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
225 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
226 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
227 VSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
228 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
231 static void vshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
232 d->x = s0->x - s1->x;
233 d->y = s0->y - s1->y;
234 d->z = s0->z - s1->z;
235 d->w = s0->w - s1->w;
236 VSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
237 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
241 * Version 1.1 specific
244 static void vshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
245 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
246 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
247 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
250 static void vshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
251 float tmp_f = fabsf(s0->w);
252 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
253 VSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
254 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
257 static void vshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
258 d->x = s0->x - floorf(s0->x);
259 d->y = s0->y - floorf(s0->y);
262 VSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
263 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
266 typedef FLOAT D3DMATRIX44[4][4];
267 typedef FLOAT D3DMATRIX43[4][3];
268 typedef FLOAT D3DMATRIX34[3][4];
269 typedef FLOAT D3DMATRIX33[3][3];
270 typedef FLOAT D3DMATRIX23[2][3];
272 static void vshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
274 * Buggy CODE: here only if cast not work for copy/paste
275 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
276 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
277 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
278 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
279 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
280 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
281 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
283 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
284 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
285 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
286 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
287 VSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
288 VSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
289 VSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
290 VSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
293 static void vshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
294 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
295 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
296 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
298 VSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
299 VSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
300 VSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
301 VSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
304 static void vshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
305 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
306 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
307 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
308 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
309 VSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
310 VSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
311 VSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
312 VSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
315 static void vshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
316 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
317 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
318 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
320 VSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
321 VSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
322 VSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
323 VSTRACE(("executing m3x3(4): (%f) \n", d->w));
326 static void vshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
328 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
329 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
335 * Version 2.0 specific
337 static void vshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
338 d->x = s0->x * (s1->x - s2->x) + s2->x;
339 d->y = s0->y * (s1->y - s2->y) + s2->y;
340 d->z = s0->z * (s1->z - s2->z) + s2->z;
341 d->w = s0->w * (s1->w - s2->w) + s2->w;
344 static void vshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
345 d->x = s0->y * s1->z - s0->z * s1->y;
346 d->y = s0->z * s1->x - s0->x * s1->z;
347 d->z = s0->x * s1->y - s0->y * s1->x;
348 d->w = 0.9f; /* w is undefined, so set it to something safeish */
350 VSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
351 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
354 static void vshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
360 VSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
361 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
366 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
367 static void vshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
371 static void vshader_call(WINED3DSHADERVECTOR* d) {
375 static void vshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
379 static void vshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
383 static void vshader_ret(void) {
387 static void vshader_endloop(void) {
391 static void vshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
395 static void vshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
399 static void vshader_sgn(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
403 static void vshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
407 static void vshader_sincos3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
411 static void vshader_sincos2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
415 static void vshader_rep(WINED3DSHADERVECTOR* d) {
419 static void vshader_endrep(void) {
423 static void vshader_if(WINED3DSHADERVECTOR* d) {
427 static void vshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
431 static void vshader_else(void) {
435 static void vshader_label(WINED3DSHADERVECTOR* d) {
439 static void vshader_endif(void) {
443 static void vshader_break(void) {
447 static void vshader_breakc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
451 static void vshader_breakp(WINED3DSHADERVECTOR* d) {
455 static void vshader_mova(WINED3DSHADERVECTOR* d) {
459 static void vshader_defb(WINED3DSHADERVECTOR* d) {
463 static void vshader_defi(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
467 static void vshader_setp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
471 static void vshader_texldl(WINED3DSHADERVECTOR* d) {
475 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
476 /* This table is not order or position dependent. */
479 {WINED3DSIO_NOP, "nop", "NOP", 0, 0, vshader_nop, vshader_hw_map2gl, NULL, 0, 0},
480 {WINED3DSIO_MOV, "mov", "MOV", 1, 2, vshader_mov, vshader_hw_map2gl, shader_glsl_mov, 0, 0},
481 {WINED3DSIO_MOVA, "mova", NULL, 1, 2, vshader_mova, vshader_hw_map2gl, shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
482 {WINED3DSIO_ADD, "add", "ADD", 1, 3, vshader_add, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
483 {WINED3DSIO_SUB, "sub", "SUB", 1, 3, vshader_sub, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
484 {WINED3DSIO_MAD, "mad", "MAD", 1, 4, vshader_mad, vshader_hw_map2gl, shader_glsl_mad, 0, 0},
485 {WINED3DSIO_MUL, "mul", "MUL", 1, 3, vshader_mul, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
486 {WINED3DSIO_RCP, "rcp", "RCP", 1, 2, vshader_rcp, vshader_hw_rsq_rcp,shader_glsl_rcp, 0, 0},
487 {WINED3DSIO_RSQ, "rsq", "RSQ", 1, 2, vshader_rsq, vshader_hw_rsq_rcp,shader_glsl_map2gl, 0, 0},
488 {WINED3DSIO_DP3, "dp3", "DP3", 1, 3, vshader_dp3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
489 {WINED3DSIO_DP4, "dp4", "DP4", 1, 3, vshader_dp4, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
490 {WINED3DSIO_MIN, "min", "MIN", 1, 3, vshader_min, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
491 {WINED3DSIO_MAX, "max", "MAX", 1, 3, vshader_max, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
492 {WINED3DSIO_SLT, "slt", "SLT", 1, 3, vshader_slt, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
493 {WINED3DSIO_SGE, "sge", "SGE", 1, 3, vshader_sge, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
494 {WINED3DSIO_ABS, "abs", "ABS", 1, 2, vshader_abs, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
495 {WINED3DSIO_EXP, "exp", "EX2", 1, 2, vshader_exp, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
496 {WINED3DSIO_LOG, "log", "LG2", 1, 2, vshader_log, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
497 {WINED3DSIO_EXPP, "expp", "EXP", 1, 2, vshader_expp, vshader_hw_map2gl, shader_glsl_expp, 0, 0},
498 {WINED3DSIO_LOGP, "logp", "LOG", 1, 2, vshader_logp, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
499 {WINED3DSIO_LIT, "lit", "LIT", 1, 2, vshader_lit, vshader_hw_map2gl, shader_glsl_lit, 0, 0},
500 {WINED3DSIO_DST, "dst", "DST", 1, 3, vshader_dst, vshader_hw_map2gl, shader_glsl_dst, 0, 0},
501 {WINED3DSIO_LRP, "lrp", "LRP", 1, 4, vshader_lrp, NULL, shader_glsl_lrp, 0, 0},
502 {WINED3DSIO_FRC, "frc", "FRC", 1, 2, vshader_frc, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
503 {WINED3DSIO_POW, "pow", "POW", 1, 3, vshader_pow, NULL, shader_glsl_map2gl, 0, 0},
504 {WINED3DSIO_CRS, "crs", "XPS", 1, 3, vshader_crs, NULL, shader_glsl_cross, 0, 0},
505 /* TODO: sng can possibly be performed a s
508 {WINED3DSIO_SGN, "sgn", NULL, 1, 2, vshader_sgn, NULL, shader_glsl_map2gl, 0, 0},
509 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
512 MUL vec.xyz, vec, tmp;
513 but I think this is better because it accounts for w properly.
519 {WINED3DSIO_NRM, "nrm", NULL, 1, 2, vshader_nrm, NULL, shader_glsl_map2gl, 0, 0},
520 {WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, vshader_sincos2, NULL, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
521 {WINED3DSIO_SINCOS, "sincos", NULL, 1, 2, vshader_sincos3, NULL, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
524 {WINED3DSIO_M4x4, "m4x4", "undefined", 1, 3, vshader_m4x4, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
525 {WINED3DSIO_M4x3, "m4x3", "undefined", 1, 3, vshader_m4x3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
526 {WINED3DSIO_M3x4, "m3x4", "undefined", 1, 3, vshader_m3x4, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
527 {WINED3DSIO_M3x3, "m3x3", "undefined", 1, 3, vshader_m3x3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
528 {WINED3DSIO_M3x2, "m3x2", "undefined", 1, 3, vshader_m3x2, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
530 /* Declare registers */
531 {WINED3DSIO_DCL, "dcl", NULL, 0, 2, vshader_dcl, NULL, NULL, 0, 0},
533 /* Constant definitions */
534 {WINED3DSIO_DEF, "def", NULL, 1, 5, vshader_def, NULL, NULL, 0, 0},
535 {WINED3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 1, 2, vshader_defb, NULL, NULL, 0, 0},
536 {WINED3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 1, 5, vshader_defi, NULL, NULL, 0, 0},
538 /* Flow control - requires GLSL or software shaders */
539 {WINED3DSIO_REP , "rep", NULL, 0, 1, vshader_rep, NULL, shader_glsl_rep, WINED3DVS_VERSION(2,0), -1},
540 {WINED3DSIO_ENDREP, "endrep", NULL, 0, 0, vshader_endrep, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
541 {WINED3DSIO_IF, "if", NULL, 0, 1, vshader_if, NULL, shader_glsl_if, WINED3DVS_VERSION(2,0), -1},
542 {WINED3DSIO_IFC, "ifc", NULL, 0, 2, vshader_ifc, NULL, shader_glsl_ifc, WINED3DVS_VERSION(2,1), -1},
543 {WINED3DSIO_ELSE, "else", NULL, 0, 0, vshader_else, NULL, shader_glsl_else, WINED3DVS_VERSION(2,0), -1},
544 {WINED3DSIO_ENDIF, "endif", NULL, 0, 0, vshader_endif, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
545 {WINED3DSIO_BREAK, "break", NULL, 0, 0, vshader_break, NULL, shader_glsl_break, WINED3DVS_VERSION(2,1), -1},
546 {WINED3DSIO_BREAKC, "breakc", NULL, 0, 2, vshader_breakc, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
547 {WINED3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 0, 1, vshader_breakp, NULL, NULL, 0, 0},
548 {WINED3DSIO_CALL, "call", NULL, 0, 1, vshader_call, NULL, shader_glsl_call, WINED3DVS_VERSION(2,0), -1},
549 {WINED3DSIO_CALLNZ, "callnz", NULL, 0, 2, vshader_callnz, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
550 {WINED3DSIO_LOOP, "loop", NULL, 0, 2, vshader_loop, NULL, shader_glsl_loop, WINED3DVS_VERSION(2,0), -1},
551 {WINED3DSIO_RET, "ret", NULL, 0, 0, vshader_ret, NULL, NULL, WINED3DVS_VERSION(2,0), -1},
552 {WINED3DSIO_ENDLOOP, "endloop", NULL, 0, 0, vshader_endloop, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
553 {WINED3DSIO_LABEL, "label", NULL, 0, 1, vshader_label, NULL, shader_glsl_label, WINED3DVS_VERSION(2,0), -1},
555 {WINED3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 1, 3, vshader_setp, NULL, NULL, 0, 0},
556 {WINED3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 1, 3, vshader_texldl, NULL, NULL, 0, 0},
557 {0, NULL, NULL, 0, 0, NULL, NULL, 0, 0}
560 static void vshader_set_limits(
561 IWineD3DVertexShaderImpl *This) {
563 This->baseShader.limits.texcoord = 0;
564 This->baseShader.limits.attributes = 16;
565 This->baseShader.limits.packed_input = 0;
567 /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
568 This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
570 switch (This->baseShader.hex_version) {
571 case WINED3DVS_VERSION(1,0):
572 case WINED3DVS_VERSION(1,1):
573 This->baseShader.limits.temporary = 12;
574 This->baseShader.limits.constant_bool = 0;
575 This->baseShader.limits.constant_int = 0;
576 This->baseShader.limits.address = 1;
577 This->baseShader.limits.packed_output = 0;
578 This->baseShader.limits.sampler = 0;
579 This->baseShader.limits.label = 0;
582 case WINED3DVS_VERSION(2,0):
583 case WINED3DVS_VERSION(2,1):
584 This->baseShader.limits.temporary = 12;
585 This->baseShader.limits.constant_bool = 16;
586 This->baseShader.limits.constant_int = 16;
587 This->baseShader.limits.address = 1;
588 This->baseShader.limits.packed_output = 0;
589 This->baseShader.limits.sampler = 0;
590 This->baseShader.limits.label = 16;
593 case WINED3DVS_VERSION(3,0):
594 This->baseShader.limits.temporary = 32;
595 This->baseShader.limits.constant_bool = 32;
596 This->baseShader.limits.constant_int = 32;
597 This->baseShader.limits.address = 1;
598 This->baseShader.limits.packed_output = 12;
599 This->baseShader.limits.sampler = 4;
600 This->baseShader.limits.label = 16; /* FIXME: 2048 */
603 default: This->baseShader.limits.temporary = 12;
604 This->baseShader.limits.constant_bool = 16;
605 This->baseShader.limits.constant_int = 16;
606 This->baseShader.limits.address = 1;
607 This->baseShader.limits.packed_output = 0;
608 This->baseShader.limits.sampler = 0;
609 This->baseShader.limits.label = 16;
610 FIXME("Unrecognized vertex shader version %#x\n",
611 This->baseShader.hex_version);
615 /* This is an internal function,
616 * used to create fake semantics for shaders
617 * that don't have them - d3d8 shaders where the declaration
618 * stores the register for each input
620 static void vshader_set_input(
621 IWineD3DVertexShaderImpl* This,
623 BYTE usage, BYTE usage_idx) {
625 /* Fake usage: set reserved bit, usage, usage_idx */
626 DWORD usage_token = (0x1 << 31) |
627 (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
629 /* Fake register; set reserved bit, regnum, type: input, wmask: all */
630 DWORD reg_token = (0x1 << 31) |
631 WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
633 This->semantics_in[regnum].usage = usage_token;
634 This->semantics_in[regnum].reg = reg_token;
637 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
638 if (usage_idx1 != usage_idx2) return FALSE;
639 if (usage1 == usage2) return TRUE;
640 if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
641 if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
646 BOOL vshader_get_input(
647 IWineD3DVertexShader* iface,
648 BYTE usage_req, BYTE usage_idx_req,
649 unsigned int* regnum) {
651 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
654 for (i = 0; i < MAX_ATTRIBS; i++) {
655 DWORD usage_token = This->semantics_in[i].usage;
656 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
657 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
659 if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
667 BOOL vshader_input_is_color(
668 IWineD3DVertexShader* iface,
669 unsigned int regnum) {
671 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
672 IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
673 IWineD3DVertexDeclarationImpl *vertexDeclaration = (IWineD3DVertexDeclarationImpl *)deviceImpl->stateBlock->vertexDecl;
675 DWORD usage_token = This->semantics_in[regnum].usage;
676 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
677 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
679 if (vertexDeclaration) {
681 /* Find the declaration element that matches our register, then check
682 * if it has D3DCOLOR as it's type. This works for both d3d8 and d3d9. */
683 for (i = 0; i < vertexDeclaration->declarationWNumElements-1; ++i) {
684 WINED3DVERTEXELEMENT *element = vertexDeclaration->pDeclarationWine + i;
685 if (match_usage(element->Usage, element->UsageIndex, usage, usage_idx)) {
686 return element->Type == WINED3DDECLTYPE_D3DCOLOR;
691 ERR("Either no vertexdeclaration present, or register not matched. This should never happen.\n");
695 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
696 or GLSL and send it to the card */
697 static VOID IWineD3DVertexShaderImpl_GenerateShader(
698 IWineD3DVertexShader *iface,
699 shader_reg_maps* reg_maps,
700 CONST DWORD *pFunction) {
702 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
703 SHADER_BUFFER buffer;
705 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
706 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
707 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
708 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
709 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
710 This->fixupVertexBufferSize = PGMSIZE;
711 This->fixupVertexBuffer[0] = 0;
713 buffer.buffer = This->device->fixupVertexBuffer;
715 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
719 buffer.newline = TRUE;
721 if (This->baseShader.shader_mode == SHADER_GLSL) {
723 /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
724 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
726 /* Base Declarations */
727 shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
729 /* Base Shader Body */
730 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
732 /* Unpack 3.0 outputs */
733 if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0))
734 vshader_glsl_output_unpack(&buffer, This->semantics_out);
736 /* Clamp the fog from 0 to 1 if it's used */
739 shader_addline(&buffer, "gl_FogFragCoord = clamp(gl_FogFragCoord, 0.0, 1.0);\n");
742 /* Write the final position.
744 * OpenGL coordinates specify the center of the pixel while d3d coords specify
745 * the corner. The offsets are stored in z and w in the 2nd row of the projection
746 * matrix to avoid wasting a free shader constant. Add them to the w and z coord
749 shader_addline(&buffer, "gl_Position.x = gl_Position.x + posFixup[2];\n");
750 shader_addline(&buffer, "gl_Position.y = gl_Position.y + posFixup[3];\n");
751 /* Account for any inverted textures (render to texture case) by reversing the y coordinate
752 * (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices)
754 shader_addline(&buffer, "gl_Position.y = gl_Position.y * posFixup[1];\n");
756 shader_addline(&buffer, "}\n");
758 TRACE("Compiling shader object %u\n", shader_obj);
759 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
760 GL_EXTCALL(glCompileShaderARB(shader_obj));
761 print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
763 /* Store the shader object */
764 This->baseShader.prgId = shader_obj;
766 } else if (This->baseShader.shader_mode == SHADER_ARB) {
768 /* Create the hw ARB shader */
769 shader_addline(&buffer, "!!ARBvp1.0\n");
771 /* Mesa supports only 95 constants */
772 if (GL_VEND(MESA) || GL_VEND(WINE))
773 This->baseShader.limits.constant_float =
774 min(95, This->baseShader.limits.constant_float);
776 /* Base Declarations */
777 shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
779 /* We need a constant to fixup the final position */
780 shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
784 shader_addline(&buffer, "TEMP TMP_FOG;\n");
787 /* Base Shader Body */
788 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
790 /* Make sure the fog value is positive - values above 1.0 are ignored */
792 shader_addline(&buffer, "MAX result.fogcoord, TMP_FOG, 0.0;\n");
794 /* Write the final position.
796 * OpenGL coordinates specify the center of the pixel while d3d coords specify
797 * the corner. The offsets are stored in the 2nd row of the projection matrix,
798 * the x offset in z and the y offset in w. Add them to the resulting position
800 shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, posFixup.z;\n");
801 shader_addline(&buffer, "ADD TMP_OUT.y, TMP_OUT.y, posFixup.w;\n");
802 /* Account for any inverted textures (render to texture case) by reversing the y coordinate
803 * (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices)
805 shader_addline(&buffer, "MUL TMP_OUT.y, TMP_OUT.y, posFixup.y;\n");
807 shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
809 shader_addline(&buffer, "END\n");
811 /* TODO: change to resource.glObjectHandle or something like that */
812 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
814 TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
815 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
817 TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
818 /* Create the program and check for errors */
819 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
820 buffer.bsize, buffer.buffer));
822 if (glGetError() == GL_INVALID_OPERATION) {
824 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
825 FIXME("HW VertexShader Error at position %d: %s\n",
826 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
827 This->baseShader.prgId = -1;
831 #if 1 /* if were using the data buffer of device then we don't need to free it */
832 HeapFree(GetProcessHeap(), 0, buffer.buffer);
836 BOOL IWineD3DVertexShaderImpl_ExecuteHAL(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
838 * TODO: use the NV_vertex_program (or 1_1) extension
839 * and specifics vendors (ARB_vertex_program??) variants for it
844 HRESULT WINAPI IWineD3DVertexShaderImpl_ExecuteSW(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
845 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
848 /** Vertex Shader Temporary Registers */
849 WINED3DSHADERVECTOR R[12];
850 /*D3DSHADERSCALAR A0;*/
851 WINED3DSHADERVECTOR A[1];
852 /** temporary Vector for modifier management */
853 WINED3DSHADERVECTOR d;
854 WINED3DSHADERVECTOR s[3];
856 const DWORD* pToken = This->baseShader.function;
857 const SHADER_OPCODE* curOpcode = NULL;
858 /** functions parameters */
859 WINED3DSHADERVECTOR* p[6];
860 WINED3DSHADERVECTOR* p_send[6];
863 /** init temporary register */
864 memset(R, 0, 12 * sizeof(WINED3DSHADERVECTOR));
866 /* vshader_program_parse(vshader); */
867 #if 0 /* Must not be 1 in cvs */
869 TRACE_VSVECTOR(This->data->C[0]);
870 TRACE_VSVECTOR(This->data->C[1]);
871 TRACE_VSVECTOR(This->data->C[2]);
872 TRACE_VSVECTOR(This->data->C[3]);
873 TRACE_VSVECTOR(This->data->C[4]);
874 TRACE_VSVECTOR(This->data->C[5]);
875 TRACE_VSVECTOR(This->data->C[6]);
876 TRACE_VSVECTOR(This->data->C[7]);
877 TRACE_VSVECTOR(This->data->C[8]);
878 TRACE_VSVECTOR(This->data->C[64]);
879 TRACE_VSVECTOR(input->V[D3DVSDE_POSITION]);
880 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDWEIGHT]);
881 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDINDICES]);
882 TRACE_VSVECTOR(input->V[D3DVSDE_NORMAL]);
883 TRACE_VSVECTOR(input->V[D3DVSDE_PSIZE]);
884 TRACE_VSVECTOR(input->V[D3DVSDE_DIFFUSE]);
885 TRACE_VSVECTOR(input->V[D3DVSDE_SPECULAR]);
886 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD0]);
887 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD1]);
890 TRACE_VSVECTOR(vshader->data->C[64]);
891 /* TODO: Run through all the tokens and find and labels, if, endifs, loops etc...., and make a labels list */
893 /* the first dword is the version tag */
896 if (shader_is_vshader_version(*pToken)) { /** version */
899 while (WINED3DVS_END() != *pToken) {
900 if (shader_is_comment(*pToken)) { /** comment */
901 DWORD comment_len = (*pToken & WINED3DSI_COMMENTSIZE_MASK) >> WINED3DSI_COMMENTSIZE_SHIFT;
903 pToken += comment_len;
907 opcode_token = *pToken++;
908 curOpcode = shader_get_opcode((IWineD3DBaseShader*) This, opcode_token);
910 if (NULL == curOpcode) {
911 FIXME("Unrecognized opcode: token=%08x\n", opcode_token);
912 pToken += shader_skip_unrecognized((IWineD3DBaseShader*) This, pToken);
916 if (curOpcode->num_params > 0) {
917 /* TRACE(">> execting opcode: pos=%d opcode_name=%s token=%08lX\n", pToken - vshader->function, curOpcode->name, *pToken); */
918 for (i = 0; i < curOpcode->num_params; ++i) {
919 DWORD reg = pToken[i] & WINED3DSP_REGNUM_MASK;
920 DWORD regtype = shader_get_regtype(pToken[i]);
923 case WINED3DSPR_TEMP:
924 /* TRACE("p[%d]=R[%d]\n", i, reg); */
927 case WINED3DSPR_INPUT:
928 /* TRACE("p[%d]=V[%s]\n", i, VertexShaderDeclRegister[reg]); */
929 p[i] = &input->V[reg];
931 case WINED3DSPR_CONST:
932 if (pToken[i] & WINED3DSHADER_ADDRMODE_RELATIVE) {
933 p[i] = &This->data->C[(DWORD) A[0].x + reg];
935 p[i] = &This->data->C[reg];
938 case WINED3DSPR_ADDR: /* case WINED3DSPR_TEXTURE: */
940 ERR("cannot handle address registers != a0, forcing use of a0\n");
943 /* TRACE("p[%d]=A[%d]\n", i, reg); */
946 case WINED3DSPR_RASTOUT:
948 case WINED3DSRO_POSITION:
949 p[i] = &output->oPos;
952 p[i] = &output->oFog;
954 case WINED3DSRO_POINT_SIZE:
955 p[i] = &output->oPts;
959 case WINED3DSPR_ATTROUT:
960 /* TRACE("p[%d]=oD[%d]\n", i, reg); */
961 p[i] = &output->oD[reg];
963 case WINED3DSPR_TEXCRDOUT:
964 /* TRACE("p[%d]=oT[%d]\n", i, reg); */
965 p[i] = &output->oT[reg];
967 /* TODO Decls and defs */
976 if (i > 0) { /* input reg */
977 DWORD swizzle = (pToken[i] & WINED3DVS_SWIZZLE_MASK) >> WINED3DVS_SWIZZLE_SHIFT;
978 UINT isNegative = ((pToken[i] & WINED3DSP_SRCMOD_MASK) == WINED3DSPSM_NEG);
980 if (!isNegative && (WINED3DVS_NOSWIZZLE >> WINED3DVS_SWIZZLE_SHIFT) == swizzle) {
981 /* TRACE("p[%d] not swizzled\n", i); */
984 DWORD swizzle_x = swizzle & 0x03;
985 DWORD swizzle_y = (swizzle >> 2) & 0x03;
986 DWORD swizzle_z = (swizzle >> 4) & 0x03;
987 DWORD swizzle_w = (swizzle >> 6) & 0x03;
988 /* TRACE("p[%d] swizzled\n", i); */
989 float* tt = (float*) p[i];
990 s[i].x = (isNegative) ? -tt[swizzle_x] : tt[swizzle_x];
991 s[i].y = (isNegative) ? -tt[swizzle_y] : tt[swizzle_y];
992 s[i].z = (isNegative) ? -tt[swizzle_z] : tt[swizzle_z];
993 s[i].w = (isNegative) ? -tt[swizzle_w] : tt[swizzle_w];
996 } else { /* output reg */
997 if ((pToken[i] & WINED3DSP_WRITEMASK_ALL) == WINED3DSP_WRITEMASK_ALL) {
1000 p_send[i] = &d; /* to be post-processed for modifiers management */
1006 switch (curOpcode->num_params) {
1008 curOpcode->soft_fct();
1011 curOpcode->soft_fct(p_send[0]);
1014 curOpcode->soft_fct(p_send[0], p_send[1]);
1017 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2]);
1020 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3]);
1023 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4]);
1026 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4], p_send[5]);
1029 ERR("%s too many params: %u\n", curOpcode->name, curOpcode->num_params);
1032 /* check if output reg modifier post-process */
1033 if (curOpcode->num_params > 0 &&
1034 (pToken[0] & WINED3DSP_WRITEMASK_ALL) != WINED3DSP_WRITEMASK_ALL) {
1036 if (pToken[0] & WINED3DSP_WRITEMASK_0) p[0]->x = d.x;
1037 if (pToken[0] & WINED3DSP_WRITEMASK_1) p[0]->y = d.y;
1038 if (pToken[0] & WINED3DSP_WRITEMASK_2) p[0]->z = d.z;
1039 if (pToken[0] & WINED3DSP_WRITEMASK_3) p[0]->w = d.w;
1042 TRACE_VSVECTOR(output->oPos);
1043 TRACE_VSVECTOR(output->oD[0]);
1044 TRACE_VSVECTOR(output->oD[1]);
1045 TRACE_VSVECTOR(output->oT[0]);
1046 TRACE_VSVECTOR(output->oT[1]);
1047 TRACE_VSVECTOR(R[0]);
1048 TRACE_VSVECTOR(R[1]);
1049 TRACE_VSVECTOR(R[2]);
1050 TRACE_VSVECTOR(R[3]);
1051 TRACE_VSVECTOR(R[4]);
1052 TRACE_VSVECTOR(R[5]);
1055 /* to next opcode token */
1056 pToken += curOpcode->num_params;
1059 TRACE("End of current instruction:\n");
1060 TRACE_VSVECTOR(output->oPos);
1061 TRACE_VSVECTOR(output->oD[0]);
1062 TRACE_VSVECTOR(output->oD[1]);
1063 TRACE_VSVECTOR(output->oT[0]);
1064 TRACE_VSVECTOR(output->oT[1]);
1065 TRACE_VSVECTOR(R[0]);
1066 TRACE_VSVECTOR(R[1]);
1067 TRACE_VSVECTOR(R[2]);
1068 TRACE_VSVECTOR(R[3]);
1069 TRACE_VSVECTOR(R[4]);
1070 TRACE_VSVECTOR(R[5]);
1073 #if 0 /* Must not be 1 in cvs */
1075 TRACE_VSVECTOR(output->oPos);
1076 TRACE_VSVECTOR(output->oD[0]);
1077 TRACE_VSVECTOR(output->oD[1]);
1078 TRACE_VSVECTOR(output->oT[0]);
1079 TRACE_VSVECTOR(output->oT[1]);
1084 /* *******************************************
1085 IWineD3DVertexShader IUnknown parts follow
1086 ******************************************* */
1087 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
1089 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1090 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
1091 if (IsEqualGUID(riid, &IID_IUnknown)
1092 || IsEqualGUID(riid, &IID_IWineD3DBase)
1093 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
1094 || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
1095 IUnknown_AddRef(iface);
1100 return E_NOINTERFACE;
1103 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
1104 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1105 TRACE("(%p) : AddRef increasing from %d\n", This, This->ref);
1106 return InterlockedIncrement(&This->ref);
1109 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
1110 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1112 TRACE("(%p) : Releasing from %d\n", This, This->ref);
1113 ref = InterlockedDecrement(&This->ref);
1115 if (This->baseShader.shader_mode == SHADER_GLSL && This->baseShader.prgId != 0) {
1116 struct list *linked_programs = &This->baseShader.linked_programs;
1118 TRACE("Deleting linked programs\n");
1119 if (linked_programs->next) {
1120 struct glsl_shader_prog_link *entry, *entry2;
1121 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
1122 delete_glsl_program_entry(This->baseShader.device, entry);
1126 TRACE("Deleting shader object %u\n", This->baseShader.prgId);
1127 GL_EXTCALL(glDeleteObjectARB(This->baseShader.prgId));
1128 checkGLcall("glDeleteObjectARB");
1130 shader_delete_constant_list(&This->baseShader.constantsF);
1131 shader_delete_constant_list(&This->baseShader.constantsB);
1132 shader_delete_constant_list(&This->baseShader.constantsI);
1133 HeapFree(GetProcessHeap(), 0, This);
1139 /* *******************************************
1140 IWineD3DVertexShader IWineD3DVertexShader parts follow
1141 ******************************************* */
1143 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
1144 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1146 *parent = This->parent;
1147 IUnknown_AddRef(*parent);
1148 TRACE("(%p) : returning %p\n", This, *parent);
1152 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
1153 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1154 IWineD3DDevice_AddRef(This->baseShader.device);
1155 *pDevice = This->baseShader.device;
1156 TRACE("(%p) returning %p\n", This, *pDevice);
1160 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
1161 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
1162 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
1164 if (NULL == pData) {
1165 *pSizeOfData = This->baseShader.functionLength;
1168 if (*pSizeOfData < This->baseShader.functionLength) {
1169 *pSizeOfData = This->baseShader.functionLength;
1170 return WINED3DERR_MOREDATA;
1172 if (NULL == This->baseShader.function) { /* no function defined */
1173 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
1174 (*(DWORD **) pData) = NULL;
1176 if(This->baseShader.functionLength == 0){
1179 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
1180 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
1185 /* Note that for vertex shaders CompileShader isn't called until the
1186 * shader is first used. The reason for this is that we need the vertex
1187 * declaration the shader will be used with in order to determine if
1188 * the data in a register is of type D3DCOLOR, and needs swizzling. */
1189 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
1191 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
1192 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
1194 shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
1196 TRACE("(%p) : pFunction %p\n", iface, pFunction);
1198 /* First pass: trace shader */
1199 shader_trace_init((IWineD3DBaseShader*) This, pFunction);
1200 vshader_set_limits(This);
1202 /* Initialize immediate constant lists */
1203 list_init(&This->baseShader.constantsF);
1204 list_init(&This->baseShader.constantsB);
1205 list_init(&This->baseShader.constantsI);
1207 /* Second pass: figure out registers used, semantics, etc.. */
1208 memset(reg_maps, 0, sizeof(shader_reg_maps));
1209 hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
1210 This->semantics_in, This->semantics_out, pFunction, deviceImpl->stateBlock);
1211 if (hr != WINED3D_OK) return hr;
1213 This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
1215 /* copy the function ... because it will certainly be released by application */
1216 if (NULL != pFunction) {
1219 function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1220 if (!function) return E_OUTOFMEMORY;
1221 memcpy(function, pFunction, This->baseShader.functionLength);
1222 This->baseShader.function = function;
1224 This->baseShader.function = NULL;
1230 /* Preload semantics for d3d8 shaders */
1231 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
1232 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
1233 IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
1236 for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
1237 WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
1238 vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
1242 /* Set local constants for d3d8 shaders */
1243 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
1244 UINT start_idx, const float *src_data, UINT count) {
1245 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
1248 TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
1250 end_idx = start_idx + count;
1251 if (end_idx > GL_LIMITS(vshader_constantsF)) {
1252 WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
1253 end_idx = GL_LIMITS(vshader_constantsF);
1256 for (i = start_idx; i < end_idx; ++i) {
1257 local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
1258 if (!lconst) return E_OUTOFMEMORY;
1261 CopyMemory(lconst->value, src_data + i * 4, 4 * sizeof(float));
1262 list_add_head(&This->baseShader.constantsF, &lconst->entry);
1268 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
1269 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1270 CONST DWORD *function = This->baseShader.function;
1272 TRACE("(%p) : function %p\n", iface, function);
1274 /* We're already compiled. */
1275 if (This->baseShader.is_compiled) return WINED3D_OK;
1277 /* We don't need to compile */
1278 if (!function || This->baseShader.shader_mode == SHADER_SW) {
1279 This->baseShader.is_compiled = TRUE;
1283 /* Generate the HW shader */
1284 TRACE("(%p) : Generating hardware program\n", This);
1285 IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
1287 This->baseShader.is_compiled = TRUE;
1292 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
1294 /*** IUnknown methods ***/
1295 IWineD3DVertexShaderImpl_QueryInterface,
1296 IWineD3DVertexShaderImpl_AddRef,
1297 IWineD3DVertexShaderImpl_Release,
1298 /*** IWineD3DBase methods ***/
1299 IWineD3DVertexShaderImpl_GetParent,
1300 /*** IWineD3DBaseShader methods ***/
1301 IWineD3DVertexShaderImpl_SetFunction,
1302 IWineD3DVertexShaderImpl_CompileShader,
1303 /*** IWineD3DVertexShader methods ***/
1304 IWineD3DVertexShaderImpl_GetDevice,
1305 IWineD3DVertexShaderImpl_GetFunction,
1306 IWineD3DVertexShaderImpl_FakeSemantics,
1307 IWIneD3DVertexShaderImpl_SetLocalConstantsF