2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2005 Oliver Stieber
7 * Copyright 2006 Ivan Gyurdiev
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
29 #include "wined3d_private.h"
31 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
35 /* Shader debugging - Change the following line to enable debugging of software
37 #if 0 /* Musxt not be 1 in cvs version */
38 # define VSTRACE(A) TRACE A
39 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
42 # define TRACE_VSVECTOR(name)
45 #if 1 /* FIXME : Needs sorting when vshader code moved in properly */
48 * DirectX9 SDK download
49 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
52 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
54 * Using Vertex Shaders
55 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
58 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
62 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
63 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
64 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
67 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
70 * http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
72 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
73 * http://developer.nvidia.com/view.asp?IO=vstovp
75 * NVIDIA: Memory Management with VAR
76 * http://developer.nvidia.com/view.asp?IO=var_memory_management
79 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
80 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
82 #define GLNAME_REQUIRE_GLSL ((const char *)1)
84 /*******************************
85 * vshader functions software VM
88 static void vshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
93 VSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
94 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
97 static void vshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
98 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
99 VSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
100 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
103 static void vshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
104 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
105 VSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
106 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
109 static void vshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
111 d->y = s0->y * s1->y;
114 VSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
115 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
118 static void vshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
124 tmp.f = floorf(s0->w);
125 d->x = powf(2.0f, tmp.f);
126 d->y = s0->w - tmp.f;
127 tmp.f = powf(2.0f, s0->w);
128 tmp.d &= 0xFFFFFF00U;
131 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
132 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
135 static void vshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
137 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
138 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
140 VSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
141 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
144 static void vshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
145 float tmp_f = fabsf(s0->w);
146 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
147 VSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
148 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
151 static void vshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
152 d->x = s0->x * s1->x + s2->x;
153 d->y = s0->y * s1->y + s2->y;
154 d->z = s0->z * s1->z + s2->z;
155 d->w = s0->w * s1->w + s2->w;
156 VSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
157 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
160 static void vshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
161 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
162 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
163 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
164 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
165 VSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
166 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
169 static void vshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
170 d->x = (s0->x < s1->x) ? s0->x : s1->x;
171 d->y = (s0->y < s1->y) ? s0->y : s1->y;
172 d->z = (s0->z < s1->z) ? s0->z : s1->z;
173 d->w = (s0->w < s1->w) ? s0->w : s1->w;
174 VSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
175 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
178 static void vshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
183 VSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
184 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
187 static void vshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
188 d->x = s0->x * s1->x;
189 d->y = s0->y * s1->y;
190 d->z = s0->z * s1->z;
191 d->w = s0->w * s1->w;
192 VSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
193 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
196 static void vshader_nop(void) {
197 /* NOPPPP ahhh too easy ;) */
198 VSTRACE(("executing nop\n"));
201 static void vshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
202 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
203 VSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
204 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
207 static void vshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
208 float tmp_f = fabsf(s0->w);
209 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
210 VSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
211 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
214 static void vshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
215 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
216 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
217 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
218 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
219 VSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
220 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
223 static void vshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
224 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
225 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
226 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
227 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
228 VSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
229 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
232 static void vshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
233 d->x = s0->x - s1->x;
234 d->y = s0->y - s1->y;
235 d->z = s0->z - s1->z;
236 d->w = s0->w - s1->w;
237 VSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
238 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
242 * Version 1.1 specific
245 static void vshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
246 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
247 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
248 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
251 static void vshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
252 float tmp_f = fabsf(s0->w);
253 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
254 VSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
255 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
258 static void vshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
259 d->x = s0->x - floorf(s0->x);
260 d->y = s0->y - floorf(s0->y);
263 VSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
264 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
267 typedef FLOAT D3DMATRIX44[4][4];
268 typedef FLOAT D3DMATRIX43[4][3];
269 typedef FLOAT D3DMATRIX34[3][4];
270 typedef FLOAT D3DMATRIX33[3][3];
271 typedef FLOAT D3DMATRIX23[2][3];
273 static void vshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
275 * Buggy CODE: here only if cast not work for copy/paste
276 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
277 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
278 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
279 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
280 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
281 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
282 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
284 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
285 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
286 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
287 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
288 VSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
289 VSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
290 VSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
291 VSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
294 static void vshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
295 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
296 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
297 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
299 VSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
300 VSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
301 VSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
302 VSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
305 static void vshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
306 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
307 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
308 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
309 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
310 VSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
311 VSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
312 VSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
313 VSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
316 static void vshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
317 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
318 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
319 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
321 VSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
322 VSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
323 VSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
324 VSTRACE(("executing m3x3(4): (%f) \n", d->w));
327 static void vshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
329 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
330 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
336 * Version 2.0 specific
338 static void vshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
339 d->x = s0->x * (s1->x - s2->x) + s2->x;
340 d->y = s0->y * (s1->y - s2->y) + s2->y;
341 d->z = s0->z * (s1->z - s2->z) + s2->z;
342 d->w = s0->w * (s1->w - s2->w) + s2->w;
345 static void vshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
346 d->x = s0->y * s1->z - s0->z * s1->y;
347 d->y = s0->z * s1->x - s0->x * s1->z;
348 d->z = s0->x * s1->y - s0->y * s1->x;
349 d->w = 0.9f; /* w is undefined, so set it to something safeish */
351 VSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
352 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
355 static void vshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
361 VSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
362 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
367 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
368 static void vshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
372 static void vshader_call(WINED3DSHADERVECTOR* d) {
376 static void vshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
380 static void vshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
384 static void vshader_ret(void) {
388 static void vshader_endloop(void) {
392 static void vshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
396 static void vshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
400 static void vshader_sgn(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
404 static void vshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
408 static void vshader_sincos3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
412 static void vshader_sincos2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
416 static void vshader_rep(WINED3DSHADERVECTOR* d) {
420 static void vshader_endrep(void) {
424 static void vshader_if(WINED3DSHADERVECTOR* d) {
428 static void vshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
432 static void vshader_else(void) {
436 static void vshader_label(WINED3DSHADERVECTOR* d) {
440 static void vshader_endif(void) {
444 static void vshader_break(void) {
448 static void vshader_breakc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
452 static void vshader_breakp(WINED3DSHADERVECTOR* d) {
456 static void vshader_mova(WINED3DSHADERVECTOR* d) {
460 static void vshader_defb(WINED3DSHADERVECTOR* d) {
464 static void vshader_defi(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
468 static void vshader_setp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
472 static void vshader_texldl(WINED3DSHADERVECTOR* d) {
477 static void vshader_hw_map2gl(SHADER_OPCODE_ARG* arg);
478 static void vshader_hw_mnxn(SHADER_OPCODE_ARG* arg);
481 * log, exp, frc, m*x* seems to be macros ins ... to see
483 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
486 {D3DSIO_NOP, "nop", "NOP", 0, 0, vshader_nop, vshader_hw_map2gl, NULL, 0, 0},
487 {D3DSIO_MOV, "mov", "MOV", 1, 2, vshader_mov, vshader_hw_map2gl, shader_glsl_mov, 0, 0},
488 {D3DSIO_ADD, "add", "ADD", 1, 3, vshader_add, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
489 {D3DSIO_SUB, "sub", "SUB", 1, 3, vshader_sub, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
490 {D3DSIO_MAD, "mad", "MAD", 1, 4, vshader_mad, vshader_hw_map2gl, shader_glsl_mad, 0, 0},
491 {D3DSIO_MUL, "mul", "MUL", 1, 3, vshader_mul, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
492 {D3DSIO_RCP, "rcp", "RCP", 1, 2, vshader_rcp, vshader_hw_map2gl, shader_glsl_rcp, 0, 0},
493 {D3DSIO_RSQ, "rsq", "RSQ", 1, 2, vshader_rsq, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
494 {D3DSIO_DP3, "dp3", "DP3", 1, 3, vshader_dp3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
495 {D3DSIO_DP4, "dp4", "DP4", 1, 3, vshader_dp4, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
496 {D3DSIO_MIN, "min", "MIN", 1, 3, vshader_min, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
497 {D3DSIO_MAX, "max", "MAX", 1, 3, vshader_max, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
498 {D3DSIO_SLT, "slt", "SLT", 1, 3, vshader_slt, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
499 {D3DSIO_SGE, "sge", "SGE", 1, 3, vshader_sge, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
500 {D3DSIO_ABS, "abs", "ABS", 1, 2, vshader_abs, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
501 {D3DSIO_EXP, "exp", "EX2", 1, 2, vshader_exp, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
502 {D3DSIO_LOG, "log", "LG2", 1, 2, vshader_log, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
503 {D3DSIO_EXPP, "expp", "EXP", 1, 2, vshader_expp, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
504 {D3DSIO_LOGP, "logp", "LOG", 1, 2, vshader_logp, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
505 {D3DSIO_LIT, "lit", "LIT", 1, 2, vshader_lit, vshader_hw_map2gl, shader_glsl_lit, 0, 0},
506 {D3DSIO_DST, "dst", "DST", 1, 3, vshader_dst, vshader_hw_map2gl, shader_glsl_dst, 0, 0},
507 {D3DSIO_LRP, "lrp", "LRP", 1, 4, vshader_lrp, NULL, shader_glsl_lrp, 0, 0},
508 {D3DSIO_FRC, "frc", "FRC", 1, 2, vshader_frc, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
509 {D3DSIO_POW, "pow", "POW", 1, 3, vshader_pow, NULL, shader_glsl_map2gl, 0, 0},
510 {D3DSIO_CRS, "crs", "XPS", 1, 3, vshader_crs, NULL, shader_glsl_map2gl, 0, 0},
511 /* TODO: sng can possibly be performed a s
514 {D3DSIO_SGN, "sgn", NULL, 1, 2, vshader_sgn, NULL, shader_glsl_map2gl, 0, 0},
515 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
518 MUL vec.xyz, vec, tmp;
519 but I think this is better because it accounts for w properly.
525 {D3DSIO_NRM, "nrm", NULL, 1, 2, vshader_nrm, NULL, shader_glsl_map2gl, 0, 0},
526 {D3DSIO_SINCOS, "sincos", NULL, 1, 4, vshader_sincos2, NULL, shader_glsl_sincos, D3DVS_VERSION(2,0), D3DVS_VERSION(2,0)},
527 {D3DSIO_SINCOS, "sincos", NULL, 1, 2, vshader_sincos3, NULL, shader_glsl_sincos, D3DVS_VERSION(3,0), -1},
530 {D3DSIO_M4x4, "m4x4", "undefined", 1, 3, vshader_m4x4, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
531 {D3DSIO_M4x3, "m4x3", "undefined", 1, 3, vshader_m4x3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
532 {D3DSIO_M3x4, "m3x4", "undefined", 1, 3, vshader_m3x4, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
533 {D3DSIO_M3x3, "m3x3", "undefined", 1, 3, vshader_m3x3, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
534 {D3DSIO_M3x2, "m3x2", "undefined", 1, 3, vshader_m3x2, vshader_hw_mnxn, shader_glsl_mnxn, 0, 0},
536 /* Declare registers */
537 {D3DSIO_DCL, "dcl", NULL, 0, 2, vshader_dcl, NULL, NULL, 0, 0},
539 /* Constant definitions */
540 {D3DSIO_DEF, "def", NULL, 1, 5, vshader_def, shader_hw_def, shader_glsl_def, 0, 0},
541 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 1, 2, vshader_defb, NULL, NULL, 0, 0},
542 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 1, 5, vshader_defi, NULL, NULL, 0, 0},
544 /* Flow control - requires GLSL or software shaders */
545 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 0, 1, vshader_rep, NULL, NULL, 0, 0},
546 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, 0, vshader_endrep, NULL, NULL, 0, 0},
547 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 0, 1, vshader_if, NULL, NULL, 0, 0},
548 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 0, 2, vshader_ifc, NULL, NULL, 0, 0},
549 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 0, 0, vshader_else, NULL, NULL, 0, 0},
550 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 0, 0, vshader_endif, NULL, NULL, 0, 0},
551 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 0, 0, vshader_break, NULL, NULL, 0, 0},
552 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 0, 2, vshader_breakc, NULL, NULL, 0, 0},
553 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 0, 1, vshader_breakp, NULL, NULL, 0, 0},
554 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 0, 1, vshader_call, NULL, NULL, 0, 0},
555 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 0, 2, vshader_callnz, NULL, NULL, 0, 0},
556 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 0, 2, vshader_loop, NULL, shader_glsl_loop, 0, 0},
557 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, 0, vshader_ret, NULL, NULL, 0, 0},
558 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, 0, vshader_endloop, NULL, shader_glsl_endloop, 0, 0},
559 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 0, 1, vshader_label, NULL, NULL, 0, 0},
561 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 1, 2, vshader_mova, NULL, shader_glsl_mov, 0, 0},
562 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 1, 3, vshader_setp, NULL, NULL, 0, 0},
563 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 1, 2, vshader_texldl, NULL, NULL, 0, 0},
564 {0, NULL, NULL, 0, 0, NULL, NULL, 0, 0}
567 inline static void vshader_program_add_output_param_swizzle(const DWORD param, int is_color, char *hwLine) {
568 /** operand output */
569 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
571 if (param & D3DSP_WRITEMASK_0) { strcat(hwLine, "x"); }
572 if (param & D3DSP_WRITEMASK_1) { strcat(hwLine, "y"); }
573 if (param & D3DSP_WRITEMASK_2) { strcat(hwLine, "z"); }
574 if (param & D3DSP_WRITEMASK_3) { strcat(hwLine, "w"); }
578 inline static void vshader_program_add_input_param_swizzle(const DWORD param, int is_color, char *hwLine) {
579 static const char swizzle_reg_chars_color_fix[] = "zyxw";
580 static const char swizzle_reg_chars[] = "xyzw";
581 const char* swizzle_regs = NULL;
585 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
586 DWORD swizzle_x = swizzle & 0x03;
587 DWORD swizzle_y = (swizzle >> 2) & 0x03;
588 DWORD swizzle_z = (swizzle >> 4) & 0x03;
589 DWORD swizzle_w = (swizzle >> 6) & 0x03;
592 swizzle_regs = swizzle_reg_chars_color_fix;
594 swizzle_regs = swizzle_reg_chars;
598 * swizzle bits fields:
601 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) { /* D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
603 sprintf(tmpReg, ".%c%c%c%c",
604 swizzle_regs[swizzle_x],
605 swizzle_regs[swizzle_y],
606 swizzle_regs[swizzle_z],
607 swizzle_regs[swizzle_w]);
608 strcat(hwLine, tmpReg);
612 if (swizzle_x == swizzle_y &&
613 swizzle_x == swizzle_z &&
614 swizzle_x == swizzle_w)
616 sprintf(tmpReg, ".%c", swizzle_regs[swizzle_x]);
617 strcat(hwLine, tmpReg);
619 sprintf(tmpReg, ".%c%c%c%c",
620 swizzle_regs[swizzle_x],
621 swizzle_regs[swizzle_y],
622 swizzle_regs[swizzle_z],
623 swizzle_regs[swizzle_w]);
624 strcat(hwLine, tmpReg);
628 inline static void vshader_program_add_param(SHADER_OPCODE_ARG *arg, const DWORD param, BOOL is_input, char *hwLine) {
630 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)arg->shader;
632 /* oPos, oFog and oPts in D3D */
633 static const char* hwrastout_reg_names[] = { "result.position", "result.fogcoord", "result.pointsize" };
635 DWORD reg = param & D3DSP_REGNUM_MASK;
636 DWORD regtype = shader_get_regtype(param);
638 BOOL is_color = FALSE;
640 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) {
641 strcat(hwLine, " -");
648 sprintf(tmpReg, "R%lu", reg);
649 strcat(hwLine, tmpReg);
653 if (This->arrayUsageMap[WINED3DSHADERDECLUSAGE_DIFFUSE] &&
654 reg == (This->arrayUsageMap[WINED3DSHADERDECLUSAGE_DIFFUSE] & D3DSP_REGNUM_MASK))
657 if (This->arrayUsageMap[WINED3DSHADERDECLUSAGE_SPECULAR] &&
658 reg == (This->arrayUsageMap[WINED3DSHADERDECLUSAGE_SPECULAR] & D3DSP_REGNUM_MASK))
661 /* FIXME: Shaders in 8.1 appear to not require a dcl statement - use
662 * the reg value from the vertex declaration. However, arrayUsageMap is not initialized
663 * in that case - how can we know if an input contains color data or not? */
665 sprintf(tmpReg, "vertex.attrib[%lu]", reg);
666 strcat(hwLine, tmpReg);
669 /* FIXME: some constants are named so we need a constants map*/
670 if (arg->reg_maps->constantsF[reg]) {
671 if (param & D3DVS_ADDRMODE_RELATIVE) {
672 FIXME("Relative addressing not expected for a named constant %lu\n", reg);
674 sprintf(tmpReg, "C%lu", reg);
676 sprintf(tmpReg, "C[%s%lu]", (param & D3DVS_ADDRMODE_RELATIVE) ? "A0.x + " : "", reg);
678 strcat(hwLine, tmpReg);
680 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
681 sprintf(tmpReg, "A%lu", reg);
682 strcat(hwLine, tmpReg);
685 sprintf(tmpReg, "%s", hwrastout_reg_names[reg]);
686 strcat(hwLine, tmpReg);
690 strcat(hwLine, "result.color.primary");
692 strcat(hwLine, "result.color.secondary");
695 case D3DSPR_TEXCRDOUT:
696 sprintf(tmpReg, "result.texcoord[%lu]", reg);
697 strcat(hwLine, tmpReg);
700 FIXME("Unknown reg type %ld %ld\n", regtype, reg);
701 strcat(hwLine, "unrecognized_register");
706 vshader_program_add_output_param_swizzle(param, is_color, hwLine);
708 vshader_program_add_input_param_swizzle(param, is_color, hwLine);
712 static void vshader_set_limits(
713 IWineD3DVertexShaderImpl *This) {
715 This->baseShader.limits.texcoord = 0;
716 This->baseShader.limits.attributes = 16;
717 This->baseShader.limits.packed_input = 0;
719 /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
720 This->baseShader.limits.constant_float = WINED3D_VSHADER_MAX_CONSTANTS;
722 switch (This->baseShader.hex_version) {
723 case D3DVS_VERSION(1,0):
724 case D3DVS_VERSION(1,1):
725 This->baseShader.limits.temporary = 12;
726 This->baseShader.limits.constant_bool = 0;
727 This->baseShader.limits.constant_int = 0;
728 This->baseShader.limits.address = 1;
729 This->baseShader.limits.packed_output = 0;
730 This->baseShader.limits.sampler = 0;
733 case D3DVS_VERSION(2,0):
734 case D3DVS_VERSION(2,1):
735 This->baseShader.limits.temporary = 12;
736 This->baseShader.limits.constant_bool = 16;
737 This->baseShader.limits.constant_int = 16;
738 This->baseShader.limits.address = 1;
739 This->baseShader.limits.packed_output = 0;
740 This->baseShader.limits.sampler = 0;
743 case D3DVS_VERSION(3,0):
744 This->baseShader.limits.temporary = 32;
745 This->baseShader.limits.constant_bool = 32;
746 This->baseShader.limits.constant_int = 32;
747 This->baseShader.limits.address = 1;
748 This->baseShader.limits.packed_output = 12;
749 This->baseShader.limits.sampler = 4;
752 default: This->baseShader.limits.temporary = 12;
753 This->baseShader.limits.constant_bool = 16;
754 This->baseShader.limits.constant_int = 16;
755 This->baseShader.limits.address = 1;
756 This->baseShader.limits.packed_output = 0;
757 This->baseShader.limits.sampler = 0;
758 FIXME("Unrecognized vertex shader version %#lx\n",
759 This->baseShader.hex_version);
763 /* Map the opcode 1-to-1 to the GL code */
764 static void vshader_hw_map2gl(SHADER_OPCODE_ARG* arg) {
766 CONST SHADER_OPCODE* curOpcode = arg->opcode;
767 SHADER_BUFFER* buffer = arg->buffer;
768 DWORD dst = arg->dst;
769 DWORD* src = arg->src;
771 DWORD dst_regtype = shader_get_regtype(dst);
775 if (curOpcode->opcode == D3DSIO_MOV && dst_regtype == D3DSPR_ADDR)
776 strcpy(tmpLine, "ARL");
778 strcpy(tmpLine, curOpcode->glname);
780 if (curOpcode->num_params > 0) {
781 vshader_program_add_param(arg, dst, FALSE, tmpLine);
782 for (i = 1; i < curOpcode->num_params; ++i) {
783 strcat(tmpLine, ",");
784 vshader_program_add_param(arg, src[i-1], TRUE, tmpLine);
787 shader_addline(buffer, "%s;\n", tmpLine);
790 /** Handles transforming all D3DSIO_M?x? opcodes for
791 Vertex shaders to ARB_vertex_program codes */
792 static void vshader_hw_mnxn(SHADER_OPCODE_ARG* arg) {
796 SHADER_OPCODE_ARG tmpArg;
798 /* Set constants for the temporary argument */
799 tmpArg.shader = arg->shader;
800 tmpArg.buffer = arg->buffer;
801 tmpArg.src[0] = arg->src[0];
802 tmpArg.reg_maps = arg->reg_maps;
804 switch(arg->opcode->opcode) {
807 tmpArg.opcode = &IWineD3DVertexShaderImpl_shader_ins[D3DSIO_DP4];
811 tmpArg.opcode = &IWineD3DVertexShaderImpl_shader_ins[D3DSIO_DP4];
815 tmpArg.opcode = &IWineD3DVertexShaderImpl_shader_ins[D3DSIO_DP3];
819 tmpArg.opcode = &IWineD3DVertexShaderImpl_shader_ins[D3DSIO_DP3];
823 tmpArg.opcode = &IWineD3DVertexShaderImpl_shader_ins[D3DSIO_DP3];
829 for (i = 0; i < nComponents; i++) {
830 tmpArg.dst = ((arg->dst) & ~D3DSP_WRITEMASK_ALL)|(D3DSP_WRITEMASK_0<<i);
831 tmpArg.src[1] = arg->src[1]+i;
832 vshader_hw_map2gl(&tmpArg);
836 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
837 or GLSL and send it to the card */
838 inline static VOID IWineD3DVertexShaderImpl_GenerateShader(
839 IWineD3DVertexShader *iface,
840 CONST DWORD *pFunction) {
842 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
843 SHADER_BUFFER buffer;
845 /* First pass: figure out which registers are used, what the semantics are, etc.. */
846 shader_reg_maps reg_maps;
847 DWORD semantics_out[WINED3DSHADERDECLUSAGE_MAX_USAGE];
849 memset(®_maps, 0, sizeof(shader_reg_maps));
850 memset(semantics_out, 0, WINED3DSHADERDECLUSAGE_MAX_USAGE * sizeof(DWORD));
851 reg_maps.semantics_in = This->arrayUsageMap;
852 reg_maps.semantics_out = semantics_out;
853 shader_get_registers_used((IWineD3DBaseShader*) This, ®_maps, pFunction);
854 /* FIXME: validate against OpenGL */
856 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
857 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
858 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
859 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
860 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
861 This->fixupVertexBufferSize = PGMSIZE;
862 This->fixupVertexBuffer[0] = 0;
864 buffer.buffer = This->device->fixupVertexBuffer;
866 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
871 if (wined3d_settings.shader_mode == SHADER_GLSL) {
873 /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
874 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
876 /* Base Declarations */
877 shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, ®_maps, &buffer);
879 /* Base Shader Body */
880 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, ®_maps, pFunction);
882 /* Unpack 3.0 outputs */
883 if (This->baseShader.hex_version >= D3DVS_VERSION(3,0))
884 vshader_glsl_output_unpack(&buffer, semantics_out);
886 shader_addline(&buffer, "}\n\0");
888 TRACE("Compiling shader object %u\n", shader_obj);
889 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
890 GL_EXTCALL(glCompileShaderARB(shader_obj));
891 print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
893 /* Store the shader object */
894 This->baseShader.prgId = shader_obj;
896 } else if (wined3d_settings.shader_mode == SHADER_ARB) {
898 /* Create the hw ARB shader */
899 shader_addline(&buffer, "!!ARBvp1.0\n");
901 /* Mesa supports only 95 constants */
902 if (GL_VEND(MESA) || GL_VEND(WINE))
903 This->baseShader.limits.constant_float =
904 min(95, This->baseShader.limits.constant_float);
906 /* Base Declarations */
907 shader_generate_arb_declarations( (IWineD3DBaseShader*) This, ®_maps, &buffer);
909 /* Base Shader Body */
910 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, ®_maps, pFunction);
912 shader_addline(&buffer, "END\n\0");
914 /* TODO: change to resource.glObjectHandle or something like that */
915 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
917 TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
918 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
920 TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
921 /* Create the program and check for errors */
922 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
923 buffer.bsize, buffer.buffer));
925 if (glGetError() == GL_INVALID_OPERATION) {
927 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
928 FIXME("HW VertexShader Error at position %d: %s\n",
929 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
930 This->baseShader.prgId = -1;
934 #if 1 /* if were using the data buffer of device then we don't need to free it */
935 HeapFree(GetProcessHeap(), 0, buffer.buffer);
939 BOOL IWineD3DVertexShaderImpl_ExecuteHAL(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
941 * TODO: use the NV_vertex_program (or 1_1) extension
942 * and specifics vendors (ARB_vertex_program??) variants for it
947 HRESULT WINAPI IWineD3DVertexShaderImpl_ExecuteSW(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
948 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
951 /** Vertex Shader Temporary Registers */
952 WINED3DSHADERVECTOR R[12];
953 /*D3DSHADERSCALAR A0;*/
954 WINED3DSHADERVECTOR A[1];
955 /** temporary Vector for modifier management */
956 WINED3DSHADERVECTOR d;
957 WINED3DSHADERVECTOR s[3];
959 const DWORD* pToken = This->baseShader.function;
960 const SHADER_OPCODE* curOpcode = NULL;
961 /** functions parameters */
962 WINED3DSHADERVECTOR* p[6];
963 WINED3DSHADERVECTOR* p_send[6];
966 /** init temporary register */
967 memset(R, 0, 12 * sizeof(WINED3DSHADERVECTOR));
969 /* vshader_program_parse(vshader); */
970 #if 0 /* Must not be 1 in cvs */
972 TRACE_VSVECTOR(This->data->C[0]);
973 TRACE_VSVECTOR(This->data->C[1]);
974 TRACE_VSVECTOR(This->data->C[2]);
975 TRACE_VSVECTOR(This->data->C[3]);
976 TRACE_VSVECTOR(This->data->C[4]);
977 TRACE_VSVECTOR(This->data->C[5]);
978 TRACE_VSVECTOR(This->data->C[6]);
979 TRACE_VSVECTOR(This->data->C[7]);
980 TRACE_VSVECTOR(This->data->C[8]);
981 TRACE_VSVECTOR(This->data->C[64]);
982 TRACE_VSVECTOR(input->V[D3DVSDE_POSITION]);
983 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDWEIGHT]);
984 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDINDICES]);
985 TRACE_VSVECTOR(input->V[D3DVSDE_NORMAL]);
986 TRACE_VSVECTOR(input->V[D3DVSDE_PSIZE]);
987 TRACE_VSVECTOR(input->V[D3DVSDE_DIFFUSE]);
988 TRACE_VSVECTOR(input->V[D3DVSDE_SPECULAR]);
989 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD0]);
990 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD1]);
993 TRACE_VSVECTOR(vshader->data->C[64]);
994 /* TODO: Run through all the tokens and find and labels, if, endifs, loops etc...., and make a labels list */
996 /* the first dword is the version tag */
999 if (shader_is_vshader_version(*pToken)) { /** version */
1002 while (D3DVS_END() != *pToken) {
1003 if (shader_is_comment(*pToken)) { /** comment */
1004 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1006 pToken += comment_len;
1010 opcode_token = *pToken++;
1011 curOpcode = shader_get_opcode((IWineD3DBaseShader*) This, opcode_token);
1013 if (NULL == curOpcode) {
1014 FIXME("Unrecognized opcode: token=%08lX\n", opcode_token);
1015 pToken += shader_skip_unrecognized((IWineD3DBaseShader*) This, pToken);
1019 if (curOpcode->num_params > 0) {
1020 /* TRACE(">> execting opcode: pos=%d opcode_name=%s token=%08lX\n", pToken - vshader->function, curOpcode->name, *pToken); */
1021 for (i = 0; i < curOpcode->num_params; ++i) {
1022 DWORD reg = pToken[i] & D3DSP_REGNUM_MASK;
1023 DWORD regtype = shader_get_regtype(pToken[i]);
1027 /* TRACE("p[%d]=R[%d]\n", i, reg); */
1031 /* TRACE("p[%d]=V[%s]\n", i, VertexShaderDeclRegister[reg]); */
1032 p[i] = &input->V[reg];
1035 if (pToken[i] & D3DVS_ADDRMODE_RELATIVE) {
1036 p[i] = &This->data->C[(DWORD) A[0].x + reg];
1038 p[i] = &This->data->C[reg];
1041 case D3DSPR_ADDR: /* case D3DSPR_TEXTURE: */
1043 ERR("cannot handle address registers != a0, forcing use of a0\n");
1046 /* TRACE("p[%d]=A[%d]\n", i, reg); */
1049 case D3DSPR_RASTOUT:
1051 case D3DSRO_POSITION:
1052 p[i] = &output->oPos;
1055 p[i] = &output->oFog;
1057 case D3DSRO_POINT_SIZE:
1058 p[i] = &output->oPts;
1062 case D3DSPR_ATTROUT:
1063 /* TRACE("p[%d]=oD[%d]\n", i, reg); */
1064 p[i] = &output->oD[reg];
1066 case D3DSPR_TEXCRDOUT:
1067 /* TRACE("p[%d]=oT[%d]\n", i, reg); */
1068 p[i] = &output->oT[reg];
1070 /* TODO Decls and defs */
1079 if (i > 0) { /* input reg */
1080 DWORD swizzle = (pToken[i] & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
1081 UINT isNegative = ((pToken[i] & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG);
1083 if (!isNegative && (D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) {
1084 /* TRACE("p[%d] not swizzled\n", i); */
1087 DWORD swizzle_x = swizzle & 0x03;
1088 DWORD swizzle_y = (swizzle >> 2) & 0x03;
1089 DWORD swizzle_z = (swizzle >> 4) & 0x03;
1090 DWORD swizzle_w = (swizzle >> 6) & 0x03;
1091 /* TRACE("p[%d] swizzled\n", i); */
1092 float* tt = (float*) p[i];
1093 s[i].x = (isNegative) ? -tt[swizzle_x] : tt[swizzle_x];
1094 s[i].y = (isNegative) ? -tt[swizzle_y] : tt[swizzle_y];
1095 s[i].z = (isNegative) ? -tt[swizzle_z] : tt[swizzle_z];
1096 s[i].w = (isNegative) ? -tt[swizzle_w] : tt[swizzle_w];
1099 } else { /* output reg */
1100 if ((pToken[i] & D3DSP_WRITEMASK_ALL) == D3DSP_WRITEMASK_ALL) {
1103 p_send[i] = &d; /* to be post-processed for modifiers management */
1109 switch (curOpcode->num_params) {
1111 curOpcode->soft_fct();
1114 curOpcode->soft_fct(p_send[0]);
1117 curOpcode->soft_fct(p_send[0], p_send[1]);
1120 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2]);
1123 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3]);
1126 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4]);
1129 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4], p_send[5]);
1132 ERR("%s too many params: %u\n", curOpcode->name, curOpcode->num_params);
1135 /* check if output reg modifier post-process */
1136 if (curOpcode->num_params > 0 && (pToken[0] & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1137 if (pToken[0] & D3DSP_WRITEMASK_0) p[0]->x = d.x;
1138 if (pToken[0] & D3DSP_WRITEMASK_1) p[0]->y = d.y;
1139 if (pToken[0] & D3DSP_WRITEMASK_2) p[0]->z = d.z;
1140 if (pToken[0] & D3DSP_WRITEMASK_3) p[0]->w = d.w;
1143 TRACE_VSVECTOR(output->oPos);
1144 TRACE_VSVECTOR(output->oD[0]);
1145 TRACE_VSVECTOR(output->oD[1]);
1146 TRACE_VSVECTOR(output->oT[0]);
1147 TRACE_VSVECTOR(output->oT[1]);
1148 TRACE_VSVECTOR(R[0]);
1149 TRACE_VSVECTOR(R[1]);
1150 TRACE_VSVECTOR(R[2]);
1151 TRACE_VSVECTOR(R[3]);
1152 TRACE_VSVECTOR(R[4]);
1153 TRACE_VSVECTOR(R[5]);
1156 /* to next opcode token */
1157 pToken += curOpcode->num_params;
1160 TRACE("End of current instruction:\n");
1161 TRACE_VSVECTOR(output->oPos);
1162 TRACE_VSVECTOR(output->oD[0]);
1163 TRACE_VSVECTOR(output->oD[1]);
1164 TRACE_VSVECTOR(output->oT[0]);
1165 TRACE_VSVECTOR(output->oT[1]);
1166 TRACE_VSVECTOR(R[0]);
1167 TRACE_VSVECTOR(R[1]);
1168 TRACE_VSVECTOR(R[2]);
1169 TRACE_VSVECTOR(R[3]);
1170 TRACE_VSVECTOR(R[4]);
1171 TRACE_VSVECTOR(R[5]);
1174 #if 0 /* Must not be 1 in cvs */
1176 TRACE_VSVECTOR(output->oPos);
1177 TRACE_VSVECTOR(output->oD[0]);
1178 TRACE_VSVECTOR(output->oD[1]);
1179 TRACE_VSVECTOR(output->oT[0]);
1180 TRACE_VSVECTOR(output->oT[1]);
1185 HRESULT WINAPI IWineD3DVertexShaderImpl_SetConstantF(IWineD3DVertexShader *iface, UINT StartRegister, CONST FLOAT *pConstantData, UINT Vector4fCount) {
1186 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1187 FIXME("(%p) : stub\n", This);
1191 HRESULT WINAPI IWineD3DVertexShaderImpl_GetConstantF(IWineD3DVertexShader *iface, UINT StartRegister, FLOAT *pConstantData, UINT Vector4fCount) {
1192 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1193 FIXME("(%p) : stub\n", This);
1197 HRESULT WINAPI IWineD3DVertexShaderImpl_SetConstantI(IWineD3DVertexShader *iface, UINT StartRegister, CONST int *pConstantData, UINT Vector4iCount) {
1198 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1199 if (StartRegister + Vector4iCount > WINED3D_VSHADER_MAX_CONSTANTS) {
1200 ERR("(%p) : SetVertexShaderConstantI C[%u] invalid\n", This, StartRegister);
1201 return WINED3DERR_INVALIDCALL;
1203 if (NULL == pConstantData) {
1204 return WINED3DERR_INVALIDCALL;
1206 FIXME("(%p) : stub\n", This);
1210 HRESULT WINAPI IWineD3DVertexShaderImpl_GetConstantI(IWineD3DVertexShader *iface, UINT StartRegister, int *pConstantData, UINT Vector4iCount) {
1211 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1212 TRACE("(%p) : C[%u] count=%u\n", This, StartRegister, Vector4iCount);
1213 if (StartRegister + Vector4iCount > WINED3D_VSHADER_MAX_CONSTANTS) {
1214 return WINED3DERR_INVALIDCALL;
1216 if (NULL == pConstantData) {
1217 return WINED3DERR_INVALIDCALL;
1219 FIXME("(%p) : stub\n", This);
1223 HRESULT WINAPI IWineD3DVertexShaderImpl_SetConstantB(IWineD3DVertexShader *iface, UINT StartRegister, CONST BOOL *pConstantData, UINT BoolCount) {
1224 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1225 if (StartRegister + BoolCount > WINED3D_VSHADER_MAX_CONSTANTS) {
1226 ERR("(%p) : SetVertexShaderConstantB C[%u] invalid\n", This, StartRegister);
1227 return WINED3DERR_INVALIDCALL;
1229 if (NULL == pConstantData) {
1230 return WINED3DERR_INVALIDCALL;
1232 FIXME("(%p) : stub\n", This);
1236 HRESULT WINAPI IWineD3DVertexShaderImpl_GetConstantB(IWineD3DVertexShader *iface, UINT StartRegister, BOOL *pConstantData, UINT BoolCount) {
1237 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl *)iface;
1238 FIXME("(%p) : stub\n", This);
1244 /* *******************************************
1245 IWineD3DVertexShader IUnknown parts follow
1246 ******************************************* */
1247 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
1249 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1250 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
1251 if (IsEqualGUID(riid, &IID_IUnknown)
1252 || IsEqualGUID(riid, &IID_IWineD3DBase)
1253 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
1254 || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
1255 IUnknown_AddRef(iface);
1260 return E_NOINTERFACE;
1263 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
1264 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1265 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
1266 return InterlockedIncrement(&This->ref);
1269 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
1270 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1272 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
1273 ref = InterlockedDecrement(&This->ref);
1275 if (This->vertexDeclaration) IWineD3DVertexDeclaration_Release(This->vertexDeclaration);
1276 if (wined3d_settings.shader_mode == SHADER_GLSL && This->baseShader.prgId != 0) {
1277 /* If this shader is still attached to a program, GL will perform a lazy delete */
1278 TRACE("Deleting shader object %u\n", This->baseShader.prgId);
1279 GL_EXTCALL(glDeleteObjectARB(This->baseShader.prgId));
1280 checkGLcall("glDeleteObjectARB");
1282 HeapFree(GetProcessHeap(), 0, This);
1287 /* *******************************************
1288 IWineD3DVertexShader IWineD3DVertexShader parts follow
1289 ******************************************* */
1291 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
1292 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1294 *parent = This->parent;
1295 IUnknown_AddRef(*parent);
1296 TRACE("(%p) : returning %p\n", This, *parent);
1300 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
1301 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1302 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
1303 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
1304 TRACE("(%p) returning %p\n", This, *pDevice);
1308 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
1309 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
1310 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
1312 if (NULL == pData) {
1313 *pSizeOfData = This->baseShader.functionLength;
1316 if (*pSizeOfData < This->baseShader.functionLength) {
1317 *pSizeOfData = This->baseShader.functionLength;
1318 return WINED3DERR_MOREDATA;
1320 if (NULL == This->baseShader.function) { /* no function defined */
1321 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
1322 (*(DWORD **) pData) = NULL;
1324 if(This->baseShader.functionLength == 0){
1327 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
1328 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
1333 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
1335 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
1337 shader_trace_init((IWineD3DBaseShader*) This, pFunction);
1338 vshader_set_limits(This);
1340 /* Generate HW shader in needed */
1341 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW)
1342 IWineD3DVertexShaderImpl_GenerateShader(iface, pFunction);
1344 /* copy the function ... because it will certainly be released by application */
1345 if (NULL != pFunction) {
1346 This->baseShader.function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1347 memcpy((void *)This->baseShader.function, pFunction, This->baseShader.functionLength);
1349 This->baseShader.function = NULL;
1354 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
1356 /*** IUnknown methods ***/
1357 IWineD3DVertexShaderImpl_QueryInterface,
1358 IWineD3DVertexShaderImpl_AddRef,
1359 IWineD3DVertexShaderImpl_Release,
1360 /*** IWineD3DBase methods ***/
1361 IWineD3DVertexShaderImpl_GetParent,
1362 /*** IWineD3DBaseShader methods ***/
1363 IWineD3DVertexShaderImpl_SetFunction,
1364 /*** IWineD3DVertexShader methods ***/
1365 IWineD3DVertexShaderImpl_GetDevice,
1366 IWineD3DVertexShaderImpl_GetFunction