2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
37 # define TRACE_VSVECTOR(name)
40 /* The maximum size of the program */
43 #define REGMASK 0x00001FFF
44 typedef void (*shader_fct_t)();
46 typedef struct SHADER_OPCODE {
50 CONST UINT num_params;
51 shader_fct_t soft_fct;
56 #define GLNAME_REQUIRE_GLSL ((const char *)1)
57 /* *******************************************
58 IWineD3DPixelShader IUnknown parts follow
59 ******************************************* */
60 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
62 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
63 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
64 if (IsEqualGUID(riid, &IID_IUnknown)
65 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
66 IUnknown_AddRef(iface);
73 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
74 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
75 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
76 return InterlockedIncrement(&This->ref);
79 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
80 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
82 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
83 ref = InterlockedDecrement(&This->ref);
85 HeapFree(GetProcessHeap(), 0, This);
90 /* TODO: At the momeny the function parser is single pass, it achievs this
91 by passing constants to a couple of functions where they are then modified.
92 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
93 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
96 /* *******************************************
97 IWineD3DPixelShader IWineD3DPixelShader parts follow
98 ******************************************* */
100 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
101 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
103 *parent = This->parent;
104 IUnknown_AddRef(*parent);
105 TRACE("(%p) : returning %p\n", This, *parent);
109 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
110 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
111 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
112 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
113 TRACE("(%p) returning %p\n", This, *pDevice);
118 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
119 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
120 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
123 *pSizeOfData = This->functionLength;
126 if (*pSizeOfData < This->functionLength) {
127 *pSizeOfData = This->functionLength;
128 return D3DERR_MOREDATA;
130 if (NULL == This->function) { /* no function defined */
131 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
132 (*(DWORD **) pData) = NULL;
134 if (This->functionLength == 0) {
137 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
138 memcpy(pData, This->function, This->functionLength);
143 /*******************************
144 * pshader functions software VM
147 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
148 d->x = s0->x + s1->x;
149 d->y = s0->y + s1->y;
150 d->z = s0->z + s1->z;
151 d->w = s0->w + s1->w;
152 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
153 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
156 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
157 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
158 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
159 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
162 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
163 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
164 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
165 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
168 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
170 d->y = s0->y * s1->y;
173 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
174 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
177 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
183 tmp.f = floorf(s0->w);
184 d->x = powf(2.0f, tmp.f);
185 d->y = s0->w - tmp.f;
186 tmp.f = powf(2.0f, s0->w);
187 tmp.d &= 0xFFFFFF00U;
190 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
191 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
194 void pshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
196 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
197 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
199 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
200 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
203 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
204 float tmp_f = fabsf(s0->w);
205 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
206 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
207 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
210 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
211 d->x = s0->x * s1->x + s2->x;
212 d->y = s0->y * s1->y + s2->y;
213 d->z = s0->z * s1->z + s2->z;
214 d->w = s0->w * s1->w + s2->w;
215 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
216 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
219 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
220 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
221 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
222 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
223 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
224 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
225 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
228 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
229 d->x = (s0->x < s1->x) ? s0->x : s1->x;
230 d->y = (s0->y < s1->y) ? s0->y : s1->y;
231 d->z = (s0->z < s1->z) ? s0->z : s1->z;
232 d->w = (s0->w < s1->w) ? s0->w : s1->w;
233 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
234 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
237 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
242 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
243 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
246 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
247 d->x = s0->x * s1->x;
248 d->y = s0->y * s1->y;
249 d->z = s0->z * s1->z;
250 d->w = s0->w * s1->w;
251 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
252 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
255 void pshader_nop(void) {
256 /* NOPPPP ahhh too easy ;) */
257 PSTRACE(("executing nop\n"));
260 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
261 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
262 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
263 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
266 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
267 float tmp_f = fabsf(s0->w);
268 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
269 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
270 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
273 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
274 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
275 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
276 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
277 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
278 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
279 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
282 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
283 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
284 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
285 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
286 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
287 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
288 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
291 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
292 d->x = s0->x - s1->x;
293 d->y = s0->y - s1->y;
294 d->z = s0->z - s1->z;
295 d->w = s0->w - s1->w;
296 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
297 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
301 * Version 1.1 specific
304 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
305 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
306 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
307 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
310 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
311 float tmp_f = fabsf(s0->w);
312 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
313 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
314 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
317 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
318 d->x = s0->x - floorf(s0->x);
319 d->y = s0->y - floorf(s0->y);
322 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
323 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
326 typedef FLOAT D3DMATRIX44[4][4];
327 typedef FLOAT D3DMATRIX43[4][3];
328 typedef FLOAT D3DMATRIX34[3][4];
329 typedef FLOAT D3DMATRIX33[3][3];
330 typedef FLOAT D3DMATRIX23[2][3];
332 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
334 * Buggy CODE: here only if cast not work for copy/paste
335 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
336 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
337 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
338 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
339 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
340 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
341 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
343 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
344 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
345 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
346 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
347 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
348 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
349 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
350 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
353 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
354 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
355 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
356 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
358 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
359 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
360 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
361 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
364 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
365 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
366 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
367 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
368 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
369 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
370 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
371 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
372 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
375 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
376 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
377 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
378 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
380 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
381 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
382 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
383 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
386 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
388 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
389 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
395 * Version 2.0 specific
397 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
398 d->x = s0->x * (s1->x - s2->x) + s2->x;
399 d->y = s0->y * (s1->y - s2->y) + s2->y;
400 d->z = s0->z * (s1->z - s2->z) + s2->z;
401 d->w = s0->w * (s1->w - s2->w) + s2->w;
404 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
405 d->x = s0->y * s1->z - s0->z * s1->y;
406 d->y = s0->z * s1->x - s0->x * s1->z;
407 d->z = s0->x * s1->y - s0->y * s1->x;
408 d->w = 0.9f; /* w is undefined, so set it to something safeish */
410 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
411 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
414 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
419 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
420 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
424 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
428 void pshader_texkill(WINED3DSHADERVECTOR* d) {
432 void pshader_tex(WINED3DSHADERVECTOR* d) {
435 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
439 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
443 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
447 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
451 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
455 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
459 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
463 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
467 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
471 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
475 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
479 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
483 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
487 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
488 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
492 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
496 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
500 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
504 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
508 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
512 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
516 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
520 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
524 void pshader_call(WINED3DSHADERVECTOR* d) {
528 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
532 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
536 void pshader_ret(WINED3DSHADERVECTOR* d) {
540 void pshader_endloop(WINED3DSHADERVECTOR* d) {
544 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
548 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
552 void pshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
556 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
560 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
564 void pshader_rep(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
568 void pshader_endrep(void) {
572 void pshader_if(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
576 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
580 void pshader_else(WINED3DSHADERVECTOR* d) {
584 void pshader_label(WINED3DSHADERVECTOR* d) {
588 void pshader_endif(WINED3DSHADERVECTOR* d) {
592 void pshader_break(WINED3DSHADERVECTOR* d) {
596 void pshader_breakc(WINED3DSHADERVECTOR* d) {
600 void pshader_mova(WINED3DSHADERVECTOR* d) {
604 void pshader_defb(WINED3DSHADERVECTOR* d) {
608 void pshader_defi(WINED3DSHADERVECTOR* d) {
612 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
616 void pshader_dsx(WINED3DSHADERVECTOR* d) {
620 void pshader_dsy(WINED3DSHADERVECTOR* d) {
624 void pshader_texldd(WINED3DSHADERVECTOR* d) {
628 void pshader_setp(WINED3DSHADERVECTOR* d) {
632 void pshader_texldl(WINED3DSHADERVECTOR* d) {
636 void pshader_breakp(WINED3DSHADERVECTOR* d) {
640 * log, exp, frc, m*x* seems to be macros ins ... to see
642 static CONST SHADER_OPCODE pshader_ins [] = {
643 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
644 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
645 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
646 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
647 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
648 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
649 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
650 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
651 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
652 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
653 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
654 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
655 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
656 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
657 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
658 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
659 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
660 {D3DSIO_LIT, "lit", "LIT", 2, pshader_lit, 0, 0},
661 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
662 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
663 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
664 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
665 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
666 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
667 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
668 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
671 /** FIXME: use direct access so add the others opcodes as stubs */
672 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
673 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
674 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
675 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
676 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
677 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
678 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
679 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
680 /* DCL is a specil operation */
681 {D3DSIO_DCL, "dcl", NULL, 1, pshader_dcl, 0, 0},
682 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
683 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
684 /* TODO: sng can possibly be performed as
687 {D3DSIO_SGN, "sng", NULL, 2, pshader_sng, 0, 0},
688 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
691 MUL vec.xyz, vec, tmp;
692 but I think this is better because it accounts for w properly.
698 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
699 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
700 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 2, pshader_rep, 0, 0},
701 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
702 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 2, pshader_if, 0, 0},
703 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
704 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 2, pshader_else, 0, 0},
705 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 2, pshader_endif, 0, 0},
706 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 2, pshader_break, 0, 0},
707 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
708 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, pshader_mova, 0, 0},
709 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
710 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
712 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
713 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
714 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
715 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
716 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
717 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
718 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
720 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
721 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
722 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
723 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
724 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
725 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
726 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
727 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
728 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
729 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
730 /* def is a special operation */
731 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
732 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
733 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
734 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
735 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
736 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
737 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
738 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
739 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
740 /* TODO: dp2add can be made out of multiple instuctions */
741 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
742 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
743 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
744 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
745 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
746 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
747 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 2, pshader_breakp, 0, 0},
748 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
749 {0, NULL, NULL, 0, NULL, 0, 0}
753 inline static const SHADER_OPCODE* pshader_program_get_opcode(const DWORD code, const int version) {
755 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
756 /** TODO: use dichotomic search */
757 while (NULL != pshader_ins[i].name) {
758 if (((code & D3DSI_OPCODE_MASK) == pshader_ins[i].opcode) &&
759 (((hex_version >= pshader_ins[i].min_version) && (hex_version <= pshader_ins[i].max_version)) ||
760 ((pshader_ins[i].min_version == 0) && (pshader_ins[i].max_version == 0)))) {
761 return &pshader_ins[i];
765 FIXME("Unsupported opcode %lx(%ld) masked %lx version %d\n", code, code, code & D3DSI_OPCODE_MASK, version);
769 inline static BOOL pshader_is_version_token(DWORD token) {
770 return 0xFFFF0000 == (token & 0xFFFF0000);
773 inline static BOOL pshader_is_comment_token(DWORD token) {
774 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
778 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
779 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
781 DWORD reg = param & REGMASK;
782 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
786 sprintf(regstr, "R%lu", reg);
790 strcpy(regstr, "fragment.color.primary");
792 strcpy(regstr, "fragment.color.secondary");
797 sprintf(regstr, "C%lu", reg);
799 sprintf(regstr, "program.env[%lu]", reg);
801 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
802 sprintf(regstr,"T%lu", reg);
805 sprintf(regstr, "%s", rastout_reg_names[reg]);
808 sprintf(regstr, "oD[%lu]", reg);
810 case D3DSPR_TEXCRDOUT:
811 sprintf(regstr, "oT[%lu]", reg);
814 FIXME("Unhandled register name Type(%ld)\n", regtype);
819 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
821 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
822 strcat(write_mask, ".");
823 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
824 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
825 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
826 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
830 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
831 static const char swizzle_reg_chars[] = "rgba";
832 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
833 DWORD swizzle_x = swizzle & 0x03;
834 DWORD swizzle_y = (swizzle >> 2) & 0x03;
835 DWORD swizzle_z = (swizzle >> 4) & 0x03;
836 DWORD swizzle_w = (swizzle >> 6) & 0x03;
838 * swizzle bits fields:
842 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
843 if (swizzle_x == swizzle_y &&
844 swizzle_x == swizzle_z &&
845 swizzle_x == swizzle_w) {
846 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
848 sprintf(swzstring, ".%c%c%c%c",
849 swizzle_reg_chars[swizzle_x],
850 swizzle_reg_chars[swizzle_y],
851 swizzle_reg_chars[swizzle_z],
852 swizzle_reg_chars[swizzle_w]);
857 inline static void addline(unsigned int *lineNum, char *pgm, unsigned int *pgmLength, char *line) {
858 int lineLen = strlen(line);
859 if(lineLen + *pgmLength > PGMSIZE - 1 /* - 1 to allow a NULL at the end */) {
860 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, lineLen + *pgmLength);
863 memcpy(pgm + *pgmLength, line, lineLen);
866 *pgmLength += lineLen;
868 TRACE("GL HW (%u, %u) : %s", *lineNum, *pgmLength, line);
871 static const char* shift_tab[] = {
872 "dummy", /* 0 (none) */
873 "coefmul.x", /* 1 (x2) */
874 "coefmul.y", /* 2 (x4) */
875 "coefmul.z", /* 3 (x8) */
876 "coefmul.w", /* 4 (x16) */
877 "dummy", /* 5 (x32) */
878 "dummy", /* 6 (x64) */
879 "dummy", /* 7 (x128) */
880 "dummy", /* 8 (d256) */
881 "dummy", /* 9 (d128) */
882 "dummy", /* 10 (d64) */
883 "dummy", /* 11 (d32) */
884 "coefdiv.w", /* 12 (d16) */
885 "coefdiv.z", /* 13 (d8) */
886 "coefdiv.y", /* 14 (d4) */
887 "coefdiv.x" /* 15 (d2) */
890 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
891 /* Generate a line that does the output modifier computation */
892 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
895 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
896 /* Generate a line that does the input modifier computation and return the input register to use */
897 static char regstr[256];
898 static char tmpline[256];
901 /* Assume a new line will be added */
904 /* Get register name */
905 get_register_name(instr, regstr, constants);
907 TRACE(" Register name %s\n", regstr);
908 switch (instr & D3DSP_SRCMOD_MASK) {
910 strcpy(outregstr, regstr);
914 sprintf(outregstr, "-%s", regstr);
918 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
920 case D3DSPSM_BIASNEG:
921 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
924 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
926 case D3DSPSM_SIGNNEG:
927 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
930 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
933 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
936 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
939 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
940 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
941 strcat(line, "\n"); /* Hack */
942 strcat(line, tmpline);
945 sprintf(line, "RCP T%c, %s;", 'A' + tmpreg, regstr);
946 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
947 strcat(line, "\n"); /* Hack */
948 strcat(line, tmpline);
951 strcpy(outregstr, regstr);
956 /* Substitute the register name */
957 sprintf(outregstr, "T%c", 'A' + tmpreg);
962 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
963 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
964 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
965 const DWORD *pToken = pFunction;
966 const SHADER_OPCODE *curOpcode = NULL;
969 unsigned lineNum = 0; /* The line number of the generated program (for loging)*/
970 char *pgmStr = NULL; /* A pointer to the program data generated by this function */
972 DWORD nUseAddressRegister = 0;
973 #if 0 /* TODO: loop register (just another address register ) */
974 BOOL hasLoops = FALSE;
977 BOOL saturate; /* clamp to 0.0 -> 1.0*/
978 int row = 0; /* not sure, something to do with macros? */
980 int version = 0; /* The version of the shader */
982 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
983 unsigned int pgmLength = 0;
985 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
986 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
987 if (This->device->fixupVertexBufferSize < PGMSIZE) {
988 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
989 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
990 This->fixupVertexBufferSize = PGMSIZE;
991 This->fixupVertexBuffer[0] = 0;
993 pgmStr = This->device->fixupVertexBuffer;
995 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, PGMSIZE); /* 64kb should be enough */
999 /* TODO: Think about using a first pass to work out what's required for the second pass. */
1000 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
1001 This->constants[i] = 0;
1003 if (NULL != pToken) {
1004 while (D3DPS_END() != *pToken) {
1005 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1007 instructionSize = pToken & SIZEBITS >> 27;
1010 if (pshader_is_version_token(*pToken)) { /** version */
1014 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1015 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1017 TRACE("found version token ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1019 /* Each release of pixel shaders has had different numbers of temp registers */
1025 case 14: numTemps=12;
1027 strcpy(tmpLine, "!!ARBfp1.0\n");
1029 case 20: numTemps=12;
1031 strcpy(tmpLine, "!!ARBfp2.0\n");
1032 FIXME("No work done yet to support ps2.0 in hw\n");
1034 case 30: numTemps=32;
1036 strcpy(tmpLine, "!!ARBfp3.0\n");
1037 FIXME("No work done yet to support ps3.0 in hw\n");
1042 strcpy(tmpLine, "!!ARBfp1.0\n");
1043 FIXME("Unrecognized pixel shader version!\n");
1045 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1047 /* TODO: find out how many registers are really needed */
1048 for(i = 0; i < 6; i++) {
1049 sprintf(tmpLine, "TEMP T%lu;\n", i);
1050 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1053 for(i = 0; i < 6; i++) {
1054 sprintf(tmpLine, "TEMP R%lu;\n", i);
1055 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1058 sprintf(tmpLine, "TEMP TMP;\n");
1059 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1060 sprintf(tmpLine, "TEMP TMP2;\n");
1061 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1062 sprintf(tmpLine, "TEMP TA;\n");
1063 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1064 sprintf(tmpLine, "TEMP TB;\n");
1065 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1066 sprintf(tmpLine, "TEMP TC;\n");
1067 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1069 strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1070 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1071 strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1072 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1073 strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1074 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1076 for(i = 0; i < 4; i++) {
1077 sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1078 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1085 if (pshader_is_comment_token(*pToken)) { /** comment */
1086 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1088 FIXME("#%s\n", (char*)pToken);
1089 pToken += comment_len;
1093 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1097 curOpcode = pshader_program_get_opcode(*pToken, version);
1098 TRACE("Found opcode %s %s\n", curOpcode->name,curOpcode->glname);
1100 if (NULL == curOpcode) {
1101 /* unknown current opcode ... (shouldn't be any!) */
1102 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1103 FIXME("unrecognized opcode: %08lx\n", *pToken);
1106 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1107 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1108 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1109 pToken += curOpcode->num_params;
1113 /* Build opcode for GL vertex_program */
1114 switch (curOpcode->opcode) {
1119 /* Address registers must be loaded with the ARL instruction */
1120 if ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) == D3DSPR_ADDR) {
1121 if (((*pToken) & REGMASK) < nUseAddressRegister) {
1122 strcpy(tmpLine, "ARL");
1125 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This, ((*pToken) & REGMASK));
1150 case D3DSIO_TEXKILL:
1151 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
1152 strcpy(tmpLine, curOpcode->glname);
1156 DWORD reg = *pToken & REGMASK;
1157 sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1158 *((const float *)(pToken + 1)),
1159 *((const float *)(pToken + 2)),
1160 *((const float *)(pToken + 3)),
1161 *((const float *)(pToken + 4)) );
1163 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1165 This->constants[reg] = 1;
1173 get_write_mask(*pToken, tmp);
1174 if (version != 14) {
1175 DWORD reg = *pToken & REGMASK;
1176 sprintf(tmpLine,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg, tmp, reg, reg);
1177 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1181 DWORD reg1 = *pToken & REGMASK;
1182 DWORD reg2 = *++pToken & REGMASK;
1183 if (gen_input_modifier_line(*pToken, 0, reg, tmpLine, This->constants)) {
1184 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1186 sprintf(tmpLine,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1, tmp, reg, reg2);
1187 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1193 case D3DSIO_TEXCOORD:
1196 get_write_mask(*pToken, tmp);
1197 if (version != 14) {
1198 DWORD reg = *pToken & REGMASK;
1199 sprintf(tmpLine, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1200 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1203 DWORD reg1 = *pToken & REGMASK;
1204 DWORD reg2 = *++pToken & REGMASK;
1205 sprintf(tmpLine, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1206 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1212 case D3DSIO_TEXM3x2PAD:
1214 DWORD reg = *pToken & REGMASK;
1216 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1217 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1219 sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1220 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1225 case D3DSIO_TEXM3x2TEX:
1227 DWORD reg = *pToken & REGMASK;
1229 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1230 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1232 sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1233 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1234 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1235 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1240 case D3DSIO_TEXREG2AR:
1242 DWORD reg1 = *pToken & REGMASK;
1243 DWORD reg2 = *++pToken & REGMASK;
1244 sprintf(tmpLine, "MOV TMP.r, T%lu.a;\n", reg2);
1245 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1246 sprintf(tmpLine, "MOV TMP.g, T%lu.r;\n", reg2);
1247 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1248 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1249 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1254 case D3DSIO_TEXREG2GB:
1256 DWORD reg1 = *pToken & REGMASK;
1257 DWORD reg2 = *++pToken & REGMASK;
1258 sprintf(tmpLine, "MOV TMP.r, T%lu.g;\n", reg2);
1259 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1260 sprintf(tmpLine, "MOV TMP.g, T%lu.b;\n", reg2);
1261 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1262 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1263 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1270 DWORD reg1 = *pToken & REGMASK;
1271 DWORD reg2 = *++pToken & REGMASK;
1273 /* FIXME: Should apply the BUMPMAPENV matrix */
1274 sprintf(tmpLine, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1275 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1276 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1277 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1282 case D3DSIO_TEXM3x3PAD:
1284 DWORD reg = *pToken & REGMASK;
1286 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1287 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1289 sprintf(tmpLine, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1290 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1296 case D3DSIO_TEXM3x3TEX:
1298 DWORD reg = *pToken & REGMASK;
1300 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1301 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1304 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1305 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1307 /* Cubemap textures will be more used than 3D ones. */
1308 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1309 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1314 case D3DSIO_TEXM3x3VSPEC:
1316 DWORD reg = *pToken & REGMASK;
1318 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1319 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1321 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1322 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1324 /* Construct the eye-ray vector from w coordinates */
1325 sprintf(tmpLine, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1326 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1327 sprintf(tmpLine, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1328 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1329 sprintf(tmpLine, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1330 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1332 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1333 sprintf(tmpLine, "DP3 TMP.w, TMP, TMP2;\n");
1334 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1335 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1336 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1337 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1338 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1340 /* Cubemap textures will be more used than 3D ones. */
1341 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1342 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1348 case D3DSIO_TEXM3x3SPEC:
1350 DWORD reg = *pToken & REGMASK;
1351 DWORD reg3 = *(pToken + 2) & REGMASK;
1353 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants)) {
1354 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1356 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1357 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1359 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1360 sprintf(tmpLine, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1361 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1363 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1364 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1365 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1366 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1368 /* Cubemap textures will be more used than 3D ones. */
1369 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1370 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1378 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1379 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1381 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1383 pToken += curOpcode->num_params; /* maybe + 1 */
1387 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1388 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1390 case D3DSPDM_SATURATE: saturate = TRUE; break;
1391 #if 0 /* as yet unhandled modifiers */
1392 case D3DSPDM_CENTROID: centroid = TRUE; break;
1393 case D3DSPDM_PP: partialpresision = TRUE; break;
1394 case D3DSPDM_X2: X2 = TRUE; break;
1395 case D3DSPDM_X4: X4 = TRUE; break;
1396 case D3DSPDM_X8: X8 = TRUE; break;
1397 case D3DSPDM_D2: D2 = TRUE; break;
1398 case D3DSPDM_D4: D4 = TRUE; break;
1399 case D3DSPDM_D8: D8 = TRUE; break;
1402 TRACE("_unhandled_modifier(0x%08lx)", mask);
1406 /* Generate input and output registers */
1407 if (curOpcode->num_params > 0) {
1409 char operands[4][100];
1413 TRACE("(%p): Opcode has %d params\n", This, curOpcode->num_params);
1415 /* Generate lines that handle input modifier computation */
1416 for (i = 1; i < curOpcode->num_params; ++i) {
1417 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1418 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpLine, This->constants)) {
1419 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1423 /* Handle saturation only when no shift is present in the output modifier */
1424 if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
1427 /* Handle output register */
1428 get_register_name(*pToken, tmpOp, This->constants);
1429 strcpy(operands[0], tmpOp);
1430 get_write_mask(*pToken, tmpOp);
1431 strcat(operands[0], tmpOp);
1433 /* This function works because of side effects from gen_input_modifier_line */
1434 /* Handle input registers */
1435 for (i = 1; i < curOpcode->num_params; ++i) {
1436 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1437 strcpy(operands[i], regs[i - 1]);
1438 get_input_register_swizzle(*(pToken + i), swzstring);
1439 strcat(operands[i], swzstring);
1442 switch(curOpcode->opcode) {
1444 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[1], operands[3], operands[2]);
1447 sprintf(tmpLine, "ADD TMP, -%s, coefdiv.x;", operands[1]);
1448 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1449 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[2], operands[3]);
1453 strcat(tmpLine, "_SAT");
1454 strcat(tmpLine, " ");
1455 strcat(tmpLine, operands[0]);
1456 for (i = 1; i < curOpcode->num_params; i++) {
1457 strcat(tmpLine, ", ");
1458 strcat(tmpLine, operands[i]);
1460 strcat(tmpLine,";\n");
1462 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1463 pToken += curOpcode->num_params;
1465 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1466 if (curOpcode->num_params > 0) {
1467 DWORD param = *(pInstr + 1);
1468 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1470 /* Generate a line that handle the output modifier computation */
1472 char write_mask[20];
1473 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1474 get_register_name(param, regstr, This->constants);
1475 get_write_mask(param, write_mask);
1476 gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
1477 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1483 /* TODO: What about result.depth? */
1484 strcpy(tmpLine, "MOV result.color, R0;\n");
1485 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1487 strcpy(tmpLine, "END\n");
1488 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1491 /* finally null terminate the pgmStr*/
1492 pgmStr[pgmLength] = 0;
1493 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1494 TRACE("(%p) : Generated program %s\n", This, pgmStr);
1495 /* Create the hw shader */
1497 /* TODO: change to resource.glObjectHandel or something like that */
1498 GL_EXTCALL(glGenProgramsARB(1, &This->prgId));
1500 TRACE("Creating a hw pixel shader, prg=%d\n", This->prgId);
1501 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->prgId));
1503 TRACE("Created hw pixel shader, prg=%d\n", This->prgId);
1504 /* Create the program and check for errors */
1505 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
1506 if (glGetError() == GL_INVALID_OPERATION) {
1508 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1509 FIXME("HW PixelShader Error at position: %d\n%s\n", errPos, glGetString(GL_PROGRAM_ERROR_STRING_ARB));
1513 #if 1 /* if were using the data buffer of device then we don't need to free it */
1514 HeapFree(GetProcessHeap(), 0, pgmStr);
1518 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1519 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1520 static const char swizzle_reg_chars[] = "rgba";
1522 /* the unknown mask is for bits not yet accounted for by any other mask... */
1523 #define UNKNOWN_MASK 0xC000
1525 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1526 #define EXTENDED_REG 0x1800
1528 DWORD reg = param & D3DSP_REGNUM_MASK;
1529 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
1532 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1533 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1534 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1535 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1537 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1541 switch (regtype /* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1549 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1552 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1555 case D3DSPR_RASTOUT:
1556 TRACE("%s", rastout_reg_names[reg]);
1558 case D3DSPR_ATTROUT:
1559 TRACE("oD%lu", reg);
1561 case D3DSPR_TEXCRDOUT:
1562 TRACE("oT%lu", reg);
1564 case D3DSPR_CONSTINT:
1565 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1567 case D3DSPR_CONSTBOOL:
1568 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1574 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1581 /** operand output */
1583 * for better debugging traces it's done into opcode dump code
1584 * @see pshader_program_dump_opcode
1585 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1586 DWORD mask = param & D3DSP_DSTMOD_MASK;
1588 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1590 TRACE("_unhandled_modifier(0x%08lx)", mask);
1593 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1594 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1596 TRACE("_x%u", 1 << shift);
1600 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1602 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1603 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1604 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1605 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1608 /** operand input */
1609 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1610 DWORD swizzle_r = swizzle & 0x03;
1611 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1612 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1613 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1615 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1616 DWORD mask = param & D3DSP_SRCMOD_MASK;
1617 /*TRACE("_modifier(0x%08lx) ", mask);*/
1619 case D3DSPSM_NONE: break;
1620 case D3DSPSM_NEG: break;
1621 case D3DSPSM_BIAS: TRACE("_bias"); break;
1622 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1623 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1624 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1625 case D3DSPSM_COMP: break;
1626 case D3DSPSM_X2: TRACE("_x2"); break;
1627 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1628 case D3DSPSM_DZ: TRACE("_dz"); break;
1629 case D3DSPSM_DW: TRACE("_dw"); break;
1631 TRACE("_unknown(0x%08lx)", mask);
1636 * swizzle bits fields:
1639 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1640 if (swizzle_r == swizzle_g &&
1641 swizzle_r == swizzle_b &&
1642 swizzle_r == swizzle_a) {
1643 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1646 swizzle_reg_chars[swizzle_r],
1647 swizzle_reg_chars[swizzle_g],
1648 swizzle_reg_chars[swizzle_b],
1649 swizzle_reg_chars[swizzle_a]);
1655 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1656 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1657 const DWORD* pToken = pFunction;
1658 const SHADER_OPCODE *curOpcode = NULL;
1662 TRACE("(%p) : Parsing programme\n", This);
1664 if (NULL != pToken) {
1665 while (D3DPS_END() != *pToken) {
1666 if (pshader_is_version_token(*pToken)) { /** version */
1667 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1668 TRACE("ps_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1673 if (pshader_is_comment_token(*pToken)) { /** comment */
1674 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1676 TRACE("//%s\n", (char*)pToken);
1677 pToken += comment_len;
1678 len += comment_len + 1;
1682 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1684 curOpcode = pshader_program_get_opcode(*pToken, version);
1687 if (NULL == curOpcode) {
1689 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1690 while (*pToken & 0x80000000) {
1692 /* unknown current opcode ... */
1693 TRACE("unrecognized opcode: %08lx", *pToken);
1700 if (curOpcode->opcode == D3DSIO_DCL) {
1702 switch(*pToken & 0xFFFF) {
1703 case D3DDECLUSAGE_POSITION:
1704 TRACE("%s%ld ", "position",(*pToken & 0xF0000) >> 16);
1706 case D3DDECLUSAGE_BLENDINDICES:
1707 TRACE("%s ", "blend");
1709 case D3DDECLUSAGE_BLENDWEIGHT:
1710 TRACE("%s ", "weight");
1712 case D3DDECLUSAGE_NORMAL:
1713 TRACE("%s%ld ", "normal",(*pToken & 0xF0000) >> 16);
1715 case D3DDECLUSAGE_PSIZE:
1716 TRACE("%s ", "psize");
1718 case D3DDECLUSAGE_COLOR:
1719 if((*pToken & 0xF0000) >> 16 == 0) {
1720 TRACE("%s ", "color");
1722 TRACE("%s%ld ", "specular", ((*pToken & 0xF0000) >> 16) - 1);
1725 case D3DDECLUSAGE_TEXCOORD:
1726 TRACE("%s%ld ", "texture", (*pToken & 0xF0000) >> 16);
1728 case D3DDECLUSAGE_TANGENT:
1729 TRACE("%s ", "tangent");
1731 case D3DDECLUSAGE_BINORMAL:
1732 TRACE("%s ", "binormal");
1734 case D3DDECLUSAGE_TESSFACTOR:
1735 TRACE("%s ", "tessfactor");
1737 case D3DDECLUSAGE_POSITIONT:
1738 TRACE("%s%ld ", "positionT",(*pToken & 0xF0000) >> 16);
1740 case D3DDECLUSAGE_FOG:
1741 TRACE("%s ", "fog");
1743 case D3DDECLUSAGE_DEPTH:
1744 TRACE("%s ", "depth");
1746 case D3DDECLUSAGE_SAMPLE:
1747 TRACE("%s ", "sample");
1750 FIXME("Unrecognised dcl %08lx", *pToken & 0xFFFF);
1754 pshader_program_dump_ps_param(*pToken, 0);
1758 if (curOpcode->opcode == D3DSIO_DEF) {
1759 TRACE("def c%lu = ", *pToken & 0xFF);
1762 TRACE("%f ,", *(float *)pToken);
1765 TRACE("%f ,", *(float *)pToken);
1768 TRACE("%f ,", *(float *)pToken);
1771 TRACE("%f", *(float *)pToken);
1775 TRACE("%s ", curOpcode->name);
1776 if (curOpcode->num_params > 0) {
1777 pshader_program_dump_ps_param(*pToken, 0);
1780 for (i = 1; i < curOpcode->num_params; ++i) {
1782 pshader_program_dump_ps_param(*pToken, 1);
1791 This->functionLength = (len + 1) * sizeof(DWORD);
1793 This->functionLength = 1; /* no Function defined use fixed function vertex processing */
1796 /* Generate HW shader in needed */
1797 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1798 TRACE("(%p) : Generating hardware program\n", This);
1800 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1804 TRACE("(%p) : Copying the function\n", This);
1805 /* copy the function ... because it will certainly be released by application */
1806 if (NULL != pFunction) {
1807 This->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->functionLength);
1808 memcpy((void *)This->function, pFunction, This->functionLength);
1810 This->function = NULL;
1813 /* TODO: Some proper return values for failures */
1814 TRACE("(%p) : Returning D3D_OK\n", This);
1818 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1820 /*** IUnknown methods ***/
1821 IWineD3DPixelShaderImpl_QueryInterface,
1822 IWineD3DPixelShaderImpl_AddRef,
1823 IWineD3DPixelShaderImpl_Release,
1824 /*** IWineD3DPixelShader methods ***/
1825 IWineD3DPixelShaderImpl_GetParent,
1826 IWineD3DPixelShaderImpl_GetDevice,
1827 IWineD3DPixelShaderImpl_GetFunction,
1828 /* not part of d3d */
1829 IWineD3DPixelShaderImpl_SetFunction