2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
37 # define TRACE_VSVECTOR(name)
40 /* The maximum size of the program */
43 #define REGMASK 0x00001FFF
44 typedef void (*shader_fct_t)();
46 typedef struct SHADER_OPCODE {
50 CONST UINT num_params;
51 shader_fct_t soft_fct;
56 #define GLNAME_REQUIRE_GLSL ((const char *)1)
57 /* *******************************************
58 IWineD3DPixelShader IUnknown parts follow
59 ******************************************* */
60 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
62 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
63 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
64 if (IsEqualGUID(riid, &IID_IUnknown)
65 || IsEqualGUID(riid, &IID_IWineD3DBase)
66 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
67 IUnknown_AddRef(iface);
74 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
75 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
76 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
77 return InterlockedIncrement(&This->ref);
80 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
81 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
83 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
84 ref = InterlockedDecrement(&This->ref);
86 HeapFree(GetProcessHeap(), 0, This);
91 /* TODO: At the momeny the function parser is single pass, it achievs this
92 by passing constants to a couple of functions where they are then modified.
93 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
94 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
97 /* *******************************************
98 IWineD3DPixelShader IWineD3DPixelShader parts follow
99 ******************************************* */
101 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
102 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
104 *parent = This->parent;
105 IUnknown_AddRef(*parent);
106 TRACE("(%p) : returning %p\n", This, *parent);
110 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
111 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
112 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
113 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
114 TRACE("(%p) returning %p\n", This, *pDevice);
119 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
120 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
121 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
124 *pSizeOfData = This->functionLength;
127 if (*pSizeOfData < This->functionLength) {
128 *pSizeOfData = This->functionLength;
129 return D3DERR_MOREDATA;
131 if (NULL == This->function) { /* no function defined */
132 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
133 (*(DWORD **) pData) = NULL;
135 if (This->functionLength == 0) {
138 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
139 memcpy(pData, This->function, This->functionLength);
144 /*******************************
145 * pshader functions software VM
148 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
149 d->x = s0->x + s1->x;
150 d->y = s0->y + s1->y;
151 d->z = s0->z + s1->z;
152 d->w = s0->w + s1->w;
153 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
154 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
157 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
158 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
159 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
160 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
163 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
164 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
165 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
166 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
169 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
171 d->y = s0->y * s1->y;
174 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
175 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
178 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
184 tmp.f = floorf(s0->w);
185 d->x = powf(2.0f, tmp.f);
186 d->y = s0->w - tmp.f;
187 tmp.f = powf(2.0f, s0->w);
188 tmp.d &= 0xFFFFFF00U;
191 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
192 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
195 void pshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
197 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
198 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
200 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
201 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
204 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
205 float tmp_f = fabsf(s0->w);
206 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
207 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
208 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
211 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
212 d->x = s0->x * s1->x + s2->x;
213 d->y = s0->y * s1->y + s2->y;
214 d->z = s0->z * s1->z + s2->z;
215 d->w = s0->w * s1->w + s2->w;
216 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
217 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
220 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
221 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
222 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
223 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
224 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
225 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
226 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
229 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
230 d->x = (s0->x < s1->x) ? s0->x : s1->x;
231 d->y = (s0->y < s1->y) ? s0->y : s1->y;
232 d->z = (s0->z < s1->z) ? s0->z : s1->z;
233 d->w = (s0->w < s1->w) ? s0->w : s1->w;
234 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
235 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
238 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
243 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
244 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
247 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
248 d->x = s0->x * s1->x;
249 d->y = s0->y * s1->y;
250 d->z = s0->z * s1->z;
251 d->w = s0->w * s1->w;
252 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
253 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
256 void pshader_nop(void) {
257 /* NOPPPP ahhh too easy ;) */
258 PSTRACE(("executing nop\n"));
261 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
262 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
263 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
264 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
267 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
268 float tmp_f = fabsf(s0->w);
269 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
270 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
271 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
274 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
275 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
276 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
277 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
278 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
279 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
280 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
283 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
284 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
285 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
286 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
287 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
288 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
289 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
292 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
293 d->x = s0->x - s1->x;
294 d->y = s0->y - s1->y;
295 d->z = s0->z - s1->z;
296 d->w = s0->w - s1->w;
297 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
298 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
302 * Version 1.1 specific
305 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
306 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
307 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
308 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
311 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
312 float tmp_f = fabsf(s0->w);
313 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
314 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
315 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
318 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
319 d->x = s0->x - floorf(s0->x);
320 d->y = s0->y - floorf(s0->y);
323 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
324 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
327 typedef FLOAT D3DMATRIX44[4][4];
328 typedef FLOAT D3DMATRIX43[4][3];
329 typedef FLOAT D3DMATRIX34[3][4];
330 typedef FLOAT D3DMATRIX33[3][3];
331 typedef FLOAT D3DMATRIX23[2][3];
333 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
335 * Buggy CODE: here only if cast not work for copy/paste
336 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
337 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
338 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
339 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
340 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
341 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
342 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
344 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
345 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
346 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
347 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
348 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
349 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
350 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
351 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
354 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
355 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
356 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
357 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
359 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
360 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
361 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
362 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
365 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
366 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
367 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
368 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
369 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
370 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
371 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
372 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
373 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
376 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
377 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
378 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
379 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
381 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
382 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
383 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
384 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
387 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
389 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
390 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
396 * Version 2.0 specific
398 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
399 d->x = s0->x * (s1->x - s2->x) + s2->x;
400 d->y = s0->y * (s1->y - s2->y) + s2->y;
401 d->z = s0->z * (s1->z - s2->z) + s2->z;
402 d->w = s0->w * (s1->w - s2->w) + s2->w;
405 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
406 d->x = s0->y * s1->z - s0->z * s1->y;
407 d->y = s0->z * s1->x - s0->x * s1->z;
408 d->z = s0->x * s1->y - s0->y * s1->x;
409 d->w = 0.9f; /* w is undefined, so set it to something safeish */
411 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
412 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
415 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
420 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
421 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
425 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
429 void pshader_texkill(WINED3DSHADERVECTOR* d) {
433 void pshader_tex(WINED3DSHADERVECTOR* d) {
436 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
440 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
444 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
448 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
452 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
456 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
460 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
464 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
468 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
472 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
476 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
480 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
484 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
488 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
489 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
493 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
497 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
501 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
505 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
509 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
513 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
517 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
521 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
525 void pshader_call(WINED3DSHADERVECTOR* d) {
529 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
533 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
537 void pshader_ret(WINED3DSHADERVECTOR* d) {
541 void pshader_endloop(WINED3DSHADERVECTOR* d) {
545 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
549 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
553 void pshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
557 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
561 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
565 void pshader_rep(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
569 void pshader_endrep(void) {
573 void pshader_if(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
577 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
581 void pshader_else(WINED3DSHADERVECTOR* d) {
585 void pshader_label(WINED3DSHADERVECTOR* d) {
589 void pshader_endif(WINED3DSHADERVECTOR* d) {
593 void pshader_break(WINED3DSHADERVECTOR* d) {
597 void pshader_breakc(WINED3DSHADERVECTOR* d) {
601 void pshader_mova(WINED3DSHADERVECTOR* d) {
605 void pshader_defb(WINED3DSHADERVECTOR* d) {
609 void pshader_defi(WINED3DSHADERVECTOR* d) {
613 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
617 void pshader_dsx(WINED3DSHADERVECTOR* d) {
621 void pshader_dsy(WINED3DSHADERVECTOR* d) {
625 void pshader_texldd(WINED3DSHADERVECTOR* d) {
629 void pshader_setp(WINED3DSHADERVECTOR* d) {
633 void pshader_texldl(WINED3DSHADERVECTOR* d) {
637 void pshader_breakp(WINED3DSHADERVECTOR* d) {
641 * log, exp, frc, m*x* seems to be macros ins ... to see
643 static CONST SHADER_OPCODE pshader_ins [] = {
644 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
645 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
646 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
647 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
648 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
649 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
650 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
651 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
652 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
653 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
654 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
655 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
656 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
657 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
658 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
659 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
660 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
661 {D3DSIO_LIT, "lit", "LIT", 2, pshader_lit, 0, 0},
662 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
663 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
664 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
665 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
666 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
667 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
668 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
669 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
672 /** FIXME: use direct access so add the others opcodes as stubs */
673 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
674 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
675 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
676 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
677 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
678 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
679 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
680 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
681 /* DCL is a specil operation */
682 {D3DSIO_DCL, "dcl", NULL, 1, pshader_dcl, 0, 0},
683 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
684 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
685 /* TODO: sng can possibly be performed as
688 {D3DSIO_SGN, "sng", NULL, 2, pshader_sng, 0, 0},
689 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
692 MUL vec.xyz, vec, tmp;
693 but I think this is better because it accounts for w properly.
699 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
700 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
701 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 2, pshader_rep, 0, 0},
702 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
703 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 2, pshader_if, 0, 0},
704 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
705 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 2, pshader_else, 0, 0},
706 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 2, pshader_endif, 0, 0},
707 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 2, pshader_break, 0, 0},
708 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
709 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, pshader_mova, 0, 0},
710 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
711 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
713 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
715 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
716 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
717 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
718 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
720 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
721 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
722 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
723 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
724 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
725 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
726 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
727 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
728 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
729 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
730 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
731 /* def is a special operation */
732 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
733 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
734 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
735 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
736 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
737 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
738 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
739 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
740 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
741 /* TODO: dp2add can be made out of multiple instuctions */
742 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
743 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
744 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
745 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
746 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
747 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
748 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 2, pshader_breakp, 0, 0},
749 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
750 {0, NULL, NULL, 0, NULL, 0, 0}
754 inline static const SHADER_OPCODE* pshader_program_get_opcode(const DWORD code, const int version) {
756 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
757 /** TODO: use dichotomic search */
758 while (NULL != pshader_ins[i].name) {
759 if (((code & D3DSI_OPCODE_MASK) == pshader_ins[i].opcode) &&
760 (((hex_version >= pshader_ins[i].min_version) && (hex_version <= pshader_ins[i].max_version)) ||
761 ((pshader_ins[i].min_version == 0) && (pshader_ins[i].max_version == 0)))) {
762 return &pshader_ins[i];
766 FIXME("Unsupported opcode %lx(%ld) masked %lx version %d\n", code, code, code & D3DSI_OPCODE_MASK, version);
770 inline static BOOL pshader_is_version_token(DWORD token) {
771 return 0xFFFF0000 == (token & 0xFFFF0000);
774 inline static BOOL pshader_is_comment_token(DWORD token) {
775 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
779 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
780 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
782 DWORD reg = param & REGMASK;
783 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
787 sprintf(regstr, "R%lu", reg);
791 strcpy(regstr, "fragment.color.primary");
793 strcpy(regstr, "fragment.color.secondary");
798 sprintf(regstr, "C%lu", reg);
800 sprintf(regstr, "program.env[%lu]", reg);
802 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
803 sprintf(regstr,"T%lu", reg);
806 sprintf(regstr, "%s", rastout_reg_names[reg]);
809 sprintf(regstr, "oD[%lu]", reg);
811 case D3DSPR_TEXCRDOUT:
812 sprintf(regstr, "oT[%lu]", reg);
815 FIXME("Unhandled register name Type(%ld)\n", regtype);
820 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
822 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
823 strcat(write_mask, ".");
824 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
825 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
826 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
827 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
831 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
832 static const char swizzle_reg_chars[] = "rgba";
833 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
834 DWORD swizzle_x = swizzle & 0x03;
835 DWORD swizzle_y = (swizzle >> 2) & 0x03;
836 DWORD swizzle_z = (swizzle >> 4) & 0x03;
837 DWORD swizzle_w = (swizzle >> 6) & 0x03;
839 * swizzle bits fields:
843 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
844 if (swizzle_x == swizzle_y &&
845 swizzle_x == swizzle_z &&
846 swizzle_x == swizzle_w) {
847 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
849 sprintf(swzstring, ".%c%c%c%c",
850 swizzle_reg_chars[swizzle_x],
851 swizzle_reg_chars[swizzle_y],
852 swizzle_reg_chars[swizzle_z],
853 swizzle_reg_chars[swizzle_w]);
858 inline static void addline(unsigned int *lineNum, char *pgm, unsigned int *pgmLength, char *line) {
859 int lineLen = strlen(line);
860 if(lineLen + *pgmLength > PGMSIZE - 1 /* - 1 to allow a NULL at the end */) {
861 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, lineLen + *pgmLength);
864 memcpy(pgm + *pgmLength, line, lineLen);
867 *pgmLength += lineLen;
869 TRACE("GL HW (%u, %u) : %s", *lineNum, *pgmLength, line);
872 static const char* shift_tab[] = {
873 "dummy", /* 0 (none) */
874 "coefmul.x", /* 1 (x2) */
875 "coefmul.y", /* 2 (x4) */
876 "coefmul.z", /* 3 (x8) */
877 "coefmul.w", /* 4 (x16) */
878 "dummy", /* 5 (x32) */
879 "dummy", /* 6 (x64) */
880 "dummy", /* 7 (x128) */
881 "dummy", /* 8 (d256) */
882 "dummy", /* 9 (d128) */
883 "dummy", /* 10 (d64) */
884 "dummy", /* 11 (d32) */
885 "coefdiv.w", /* 12 (d16) */
886 "coefdiv.z", /* 13 (d8) */
887 "coefdiv.y", /* 14 (d4) */
888 "coefdiv.x" /* 15 (d2) */
891 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
892 /* Generate a line that does the output modifier computation */
893 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
896 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
897 /* Generate a line that does the input modifier computation and return the input register to use */
898 static char regstr[256];
899 static char tmpline[256];
902 /* Assume a new line will be added */
905 /* Get register name */
906 get_register_name(instr, regstr, constants);
908 TRACE(" Register name %s\n", regstr);
909 switch (instr & D3DSP_SRCMOD_MASK) {
911 strcpy(outregstr, regstr);
915 sprintf(outregstr, "-%s", regstr);
919 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
921 case D3DSPSM_BIASNEG:
922 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
925 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
927 case D3DSPSM_SIGNNEG:
928 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
931 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
934 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
937 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
940 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
941 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
942 strcat(line, "\n"); /* Hack */
943 strcat(line, tmpline);
946 sprintf(line, "RCP T%c, %s;", 'A' + tmpreg, regstr);
947 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
948 strcat(line, "\n"); /* Hack */
949 strcat(line, tmpline);
952 strcpy(outregstr, regstr);
957 /* Substitute the register name */
958 sprintf(outregstr, "T%c", 'A' + tmpreg);
963 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
964 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
965 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
966 const DWORD *pToken = pFunction;
967 const SHADER_OPCODE *curOpcode = NULL;
970 unsigned lineNum = 0; /* The line number of the generated program (for loging)*/
971 char *pgmStr = NULL; /* A pointer to the program data generated by this function */
973 DWORD nUseAddressRegister = 0;
974 #if 0 /* TODO: loop register (just another address register ) */
975 BOOL hasLoops = FALSE;
978 BOOL saturate; /* clamp to 0.0 -> 1.0*/
979 int row = 0; /* not sure, something to do with macros? */
981 int version = 0; /* The version of the shader */
983 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
984 unsigned int pgmLength = 0;
986 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
987 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
988 if (This->device->fixupVertexBufferSize < PGMSIZE) {
989 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
990 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
991 This->fixupVertexBufferSize = PGMSIZE;
992 This->fixupVertexBuffer[0] = 0;
994 pgmStr = This->device->fixupVertexBuffer;
996 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, PGMSIZE); /* 64kb should be enough */
1000 /* TODO: Think about using a first pass to work out what's required for the second pass. */
1001 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
1002 This->constants[i] = 0;
1004 if (NULL != pToken) {
1005 while (D3DPS_END() != *pToken) {
1006 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1008 instructionSize = pToken & SIZEBITS >> 27;
1011 if (pshader_is_version_token(*pToken)) { /** version */
1015 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1016 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1018 TRACE("found version token ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1020 /* Each release of pixel shaders has had different numbers of temp registers */
1026 case 14: numTemps=12;
1028 strcpy(tmpLine, "!!ARBfp1.0\n");
1030 case 20: numTemps=12;
1032 strcpy(tmpLine, "!!ARBfp2.0\n");
1033 FIXME("No work done yet to support ps2.0 in hw\n");
1035 case 30: numTemps=32;
1037 strcpy(tmpLine, "!!ARBfp3.0\n");
1038 FIXME("No work done yet to support ps3.0 in hw\n");
1043 strcpy(tmpLine, "!!ARBfp1.0\n");
1044 FIXME("Unrecognized pixel shader version!\n");
1046 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1048 /* TODO: find out how many registers are really needed */
1049 for(i = 0; i < 6; i++) {
1050 sprintf(tmpLine, "TEMP T%lu;\n", i);
1051 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1054 for(i = 0; i < 6; i++) {
1055 sprintf(tmpLine, "TEMP R%lu;\n", i);
1056 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1059 sprintf(tmpLine, "TEMP TMP;\n");
1060 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1061 sprintf(tmpLine, "TEMP TMP2;\n");
1062 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1063 sprintf(tmpLine, "TEMP TA;\n");
1064 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1065 sprintf(tmpLine, "TEMP TB;\n");
1066 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1067 sprintf(tmpLine, "TEMP TC;\n");
1068 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1070 strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1071 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1072 strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1073 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1074 strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1075 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1077 for(i = 0; i < 4; i++) {
1078 sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1079 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1086 if (pshader_is_comment_token(*pToken)) { /** comment */
1087 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1089 FIXME("#%s\n", (char*)pToken);
1090 pToken += comment_len;
1094 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1098 curOpcode = pshader_program_get_opcode(*pToken, version);
1100 if (NULL == curOpcode) {
1101 /* unknown current opcode ... (shouldn't be any!) */
1102 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1103 FIXME("unrecognized opcode: %08lx\n", *pToken);
1106 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1107 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1108 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1109 pToken += curOpcode->num_params;
1111 TRACE("Found opcode %s %s\n", curOpcode->name, curOpcode->glname);
1114 /* Build opcode for GL vertex_program */
1115 switch (curOpcode->opcode) {
1120 /* Address registers must be loaded with the ARL instruction */
1121 if ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) == D3DSPR_ADDR) {
1122 if (((*pToken) & REGMASK) < nUseAddressRegister) {
1123 strcpy(tmpLine, "ARL");
1126 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This, ((*pToken) & REGMASK));
1151 case D3DSIO_TEXKILL:
1152 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
1153 strcpy(tmpLine, curOpcode->glname);
1157 DWORD reg = *pToken & REGMASK;
1158 sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1159 *((const float *)(pToken + 1)),
1160 *((const float *)(pToken + 2)),
1161 *((const float *)(pToken + 3)),
1162 *((const float *)(pToken + 4)) );
1164 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1166 This->constants[reg] = 1;
1174 get_write_mask(*pToken, tmp);
1175 if (version != 14) {
1176 DWORD reg = *pToken & REGMASK;
1177 sprintf(tmpLine,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg, tmp, reg, reg);
1178 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1182 DWORD reg1 = *pToken & REGMASK;
1183 DWORD reg2 = *++pToken & REGMASK;
1184 if (gen_input_modifier_line(*pToken, 0, reg, tmpLine, This->constants)) {
1185 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1187 sprintf(tmpLine,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1, tmp, reg, reg2);
1188 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1194 case D3DSIO_TEXCOORD:
1197 get_write_mask(*pToken, tmp);
1198 if (version != 14) {
1199 DWORD reg = *pToken & REGMASK;
1200 sprintf(tmpLine, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1201 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1204 DWORD reg1 = *pToken & REGMASK;
1205 DWORD reg2 = *++pToken & REGMASK;
1206 sprintf(tmpLine, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1207 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1213 case D3DSIO_TEXM3x2PAD:
1215 DWORD reg = *pToken & REGMASK;
1217 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1218 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1220 sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1221 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1226 case D3DSIO_TEXM3x2TEX:
1228 DWORD reg = *pToken & REGMASK;
1230 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1231 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1233 sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1234 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1235 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1236 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1241 case D3DSIO_TEXREG2AR:
1243 DWORD reg1 = *pToken & REGMASK;
1244 DWORD reg2 = *++pToken & REGMASK;
1245 sprintf(tmpLine, "MOV TMP.r, T%lu.a;\n", reg2);
1246 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1247 sprintf(tmpLine, "MOV TMP.g, T%lu.r;\n", reg2);
1248 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1249 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1250 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1255 case D3DSIO_TEXREG2GB:
1257 DWORD reg1 = *pToken & REGMASK;
1258 DWORD reg2 = *++pToken & REGMASK;
1259 sprintf(tmpLine, "MOV TMP.r, T%lu.g;\n", reg2);
1260 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1261 sprintf(tmpLine, "MOV TMP.g, T%lu.b;\n", reg2);
1262 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1263 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1264 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1271 DWORD reg1 = *pToken & REGMASK;
1272 DWORD reg2 = *++pToken & REGMASK;
1274 /* FIXME: Should apply the BUMPMAPENV matrix */
1275 sprintf(tmpLine, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1276 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1277 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1278 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1283 case D3DSIO_TEXM3x3PAD:
1285 DWORD reg = *pToken & REGMASK;
1287 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1288 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1290 sprintf(tmpLine, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1291 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1297 case D3DSIO_TEXM3x3TEX:
1299 DWORD reg = *pToken & REGMASK;
1301 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1302 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1305 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1306 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1308 /* Cubemap textures will be more used than 3D ones. */
1309 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1310 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1315 case D3DSIO_TEXM3x3VSPEC:
1317 DWORD reg = *pToken & REGMASK;
1319 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1320 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1322 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1323 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1325 /* Construct the eye-ray vector from w coordinates */
1326 sprintf(tmpLine, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1327 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1328 sprintf(tmpLine, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1329 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1330 sprintf(tmpLine, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1331 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1333 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1334 sprintf(tmpLine, "DP3 TMP.w, TMP, TMP2;\n");
1335 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1336 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1337 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1338 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1339 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1341 /* Cubemap textures will be more used than 3D ones. */
1342 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1343 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1349 case D3DSIO_TEXM3x3SPEC:
1351 DWORD reg = *pToken & REGMASK;
1352 DWORD reg3 = *(pToken + 2) & REGMASK;
1354 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants)) {
1355 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1357 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1358 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1360 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1361 sprintf(tmpLine, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1362 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1364 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1365 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1366 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1367 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1369 /* Cubemap textures will be more used than 3D ones. */
1370 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1371 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1379 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1380 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1382 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1384 pToken += curOpcode->num_params; /* maybe + 1 */
1388 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1389 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1391 case D3DSPDM_SATURATE: saturate = TRUE; break;
1392 #if 0 /* as yet unhandled modifiers */
1393 case D3DSPDM_CENTROID: centroid = TRUE; break;
1394 case D3DSPDM_PP: partialpresision = TRUE; break;
1395 case D3DSPDM_X2: X2 = TRUE; break;
1396 case D3DSPDM_X4: X4 = TRUE; break;
1397 case D3DSPDM_X8: X8 = TRUE; break;
1398 case D3DSPDM_D2: D2 = TRUE; break;
1399 case D3DSPDM_D4: D4 = TRUE; break;
1400 case D3DSPDM_D8: D8 = TRUE; break;
1403 TRACE("_unhandled_modifier(0x%08lx)\n", mask);
1407 /* Generate input and output registers */
1408 if (curOpcode->num_params > 0) {
1410 char operands[4][100];
1414 TRACE("(%p): Opcode has %d params\n", This, curOpcode->num_params);
1416 /* Generate lines that handle input modifier computation */
1417 for (i = 1; i < curOpcode->num_params; ++i) {
1418 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1419 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpLine, This->constants)) {
1420 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1424 /* Handle saturation only when no shift is present in the output modifier */
1425 if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
1428 /* Handle output register */
1429 get_register_name(*pToken, tmpOp, This->constants);
1430 strcpy(operands[0], tmpOp);
1431 get_write_mask(*pToken, tmpOp);
1432 strcat(operands[0], tmpOp);
1434 /* This function works because of side effects from gen_input_modifier_line */
1435 /* Handle input registers */
1436 for (i = 1; i < curOpcode->num_params; ++i) {
1437 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1438 strcpy(operands[i], regs[i - 1]);
1439 get_input_register_swizzle(*(pToken + i), swzstring);
1440 strcat(operands[i], swzstring);
1443 switch(curOpcode->opcode) {
1445 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[1], operands[3], operands[2]);
1448 sprintf(tmpLine, "ADD TMP, -%s, coefdiv.x;", operands[1]);
1449 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1450 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[2], operands[3]);
1454 strcat(tmpLine, "_SAT");
1455 strcat(tmpLine, " ");
1456 strcat(tmpLine, operands[0]);
1457 for (i = 1; i < curOpcode->num_params; i++) {
1458 strcat(tmpLine, ", ");
1459 strcat(tmpLine, operands[i]);
1461 strcat(tmpLine,";\n");
1463 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1464 pToken += curOpcode->num_params;
1466 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1467 if (curOpcode->num_params > 0) {
1468 DWORD param = *(pInstr + 1);
1469 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1471 /* Generate a line that handle the output modifier computation */
1473 char write_mask[20];
1474 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1475 get_register_name(param, regstr, This->constants);
1476 get_write_mask(param, write_mask);
1477 gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
1478 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1484 /* TODO: What about result.depth? */
1485 strcpy(tmpLine, "MOV result.color, R0;\n");
1486 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1488 strcpy(tmpLine, "END\n");
1489 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1492 /* finally null terminate the pgmStr*/
1493 pgmStr[pgmLength] = 0;
1494 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1495 /* Create the hw shader */
1497 /* pgmStr sometimes gets too long for a normal TRACE */
1498 TRACE("Generated program:\n");
1499 if (TRACE_ON(d3d_shader)) {
1500 fprintf(stderr, "%s\n", pgmStr);
1503 /* TODO: change to resource.glObjectHandel or something like that */
1504 GL_EXTCALL(glGenProgramsARB(1, &This->prgId));
1506 TRACE("Creating a hw pixel shader, prg=%d\n", This->prgId);
1507 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->prgId));
1509 TRACE("Created hw pixel shader, prg=%d\n", This->prgId);
1510 /* Create the program and check for errors */
1511 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
1512 if (glGetError() == GL_INVALID_OPERATION) {
1514 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1515 FIXME("HW PixelShader Error at position: %d\n%s\n", errPos, glGetString(GL_PROGRAM_ERROR_STRING_ARB));
1519 #if 1 /* if were using the data buffer of device then we don't need to free it */
1520 HeapFree(GetProcessHeap(), 0, pgmStr);
1524 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1525 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1526 static const char swizzle_reg_chars[] = "rgba";
1528 /* the unknown mask is for bits not yet accounted for by any other mask... */
1529 #define UNKNOWN_MASK 0xC000
1531 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1532 #define EXTENDED_REG 0x1800
1534 DWORD reg = param & D3DSP_REGNUM_MASK;
1535 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
1538 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1539 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1540 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1541 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1543 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1547 switch (regtype /* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1555 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1558 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1561 case D3DSPR_RASTOUT:
1562 TRACE("%s", rastout_reg_names[reg]);
1564 case D3DSPR_ATTROUT:
1565 TRACE("oD%lu", reg);
1567 case D3DSPR_TEXCRDOUT:
1568 TRACE("oT%lu", reg);
1570 case D3DSPR_CONSTINT:
1571 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1573 case D3DSPR_CONSTBOOL:
1574 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1580 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1587 /** operand output */
1589 * for better debugging traces it's done into opcode dump code
1590 * @see pshader_program_dump_opcode
1591 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1592 DWORD mask = param & D3DSP_DSTMOD_MASK;
1594 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1596 TRACE("_unhandled_modifier(0x%08lx)", mask);
1599 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1600 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1602 TRACE("_x%u", 1 << shift);
1606 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1608 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1609 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1610 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1611 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1614 /** operand input */
1615 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1616 DWORD swizzle_r = swizzle & 0x03;
1617 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1618 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1619 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1621 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1622 DWORD mask = param & D3DSP_SRCMOD_MASK;
1623 /*TRACE("_modifier(0x%08lx) ", mask);*/
1625 case D3DSPSM_NONE: break;
1626 case D3DSPSM_NEG: break;
1627 case D3DSPSM_BIAS: TRACE("_bias"); break;
1628 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1629 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1630 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1631 case D3DSPSM_COMP: break;
1632 case D3DSPSM_X2: TRACE("_x2"); break;
1633 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1634 case D3DSPSM_DZ: TRACE("_dz"); break;
1635 case D3DSPSM_DW: TRACE("_dw"); break;
1637 TRACE("_unknown(0x%08lx)", mask);
1642 * swizzle bits fields:
1645 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1646 if (swizzle_r == swizzle_g &&
1647 swizzle_r == swizzle_b &&
1648 swizzle_r == swizzle_a) {
1649 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1652 swizzle_reg_chars[swizzle_r],
1653 swizzle_reg_chars[swizzle_g],
1654 swizzle_reg_chars[swizzle_b],
1655 swizzle_reg_chars[swizzle_a]);
1661 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1662 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1663 const DWORD* pToken = pFunction;
1664 const SHADER_OPCODE *curOpcode = NULL;
1668 TRACE("(%p) : Parsing programme\n", This);
1670 if (NULL != pToken) {
1671 while (D3DPS_END() != *pToken) {
1672 if (pshader_is_version_token(*pToken)) { /** version */
1673 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1674 TRACE("ps_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1679 if (pshader_is_comment_token(*pToken)) { /** comment */
1680 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1682 TRACE("//%s\n", (char*)pToken);
1683 pToken += comment_len;
1684 len += comment_len + 1;
1688 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1690 curOpcode = pshader_program_get_opcode(*pToken, version);
1693 if (NULL == curOpcode) {
1695 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1696 while (*pToken & 0x80000000) {
1698 /* unknown current opcode ... */
1699 TRACE("unrecognized opcode: %08lx", *pToken);
1706 if (curOpcode->opcode == D3DSIO_DCL) {
1708 switch(*pToken & 0xFFFF) {
1709 case D3DDECLUSAGE_POSITION:
1710 TRACE("%s%ld ", "position",(*pToken & 0xF0000) >> 16);
1712 case D3DDECLUSAGE_BLENDINDICES:
1713 TRACE("%s ", "blend");
1715 case D3DDECLUSAGE_BLENDWEIGHT:
1716 TRACE("%s ", "weight");
1718 case D3DDECLUSAGE_NORMAL:
1719 TRACE("%s%ld ", "normal",(*pToken & 0xF0000) >> 16);
1721 case D3DDECLUSAGE_PSIZE:
1722 TRACE("%s ", "psize");
1724 case D3DDECLUSAGE_COLOR:
1725 if((*pToken & 0xF0000) >> 16 == 0) {
1726 TRACE("%s ", "color");
1728 TRACE("%s%ld ", "specular", ((*pToken & 0xF0000) >> 16) - 1);
1731 case D3DDECLUSAGE_TEXCOORD:
1732 TRACE("%s%ld ", "texture", (*pToken & 0xF0000) >> 16);
1734 case D3DDECLUSAGE_TANGENT:
1735 TRACE("%s ", "tangent");
1737 case D3DDECLUSAGE_BINORMAL:
1738 TRACE("%s ", "binormal");
1740 case D3DDECLUSAGE_TESSFACTOR:
1741 TRACE("%s ", "tessfactor");
1743 case D3DDECLUSAGE_POSITIONT:
1744 TRACE("%s%ld ", "positionT",(*pToken & 0xF0000) >> 16);
1746 case D3DDECLUSAGE_FOG:
1747 TRACE("%s ", "fog");
1749 case D3DDECLUSAGE_DEPTH:
1750 TRACE("%s ", "depth");
1752 case D3DDECLUSAGE_SAMPLE:
1753 TRACE("%s ", "sample");
1756 FIXME("Unrecognised dcl %08lx", *pToken & 0xFFFF);
1760 pshader_program_dump_ps_param(*pToken, 0);
1764 if (curOpcode->opcode == D3DSIO_DEF) {
1765 TRACE("def c%lu = ", *pToken & 0xFF);
1768 TRACE("%f ,", *(float *)pToken);
1771 TRACE("%f ,", *(float *)pToken);
1774 TRACE("%f ,", *(float *)pToken);
1777 TRACE("%f", *(float *)pToken);
1781 TRACE("%s ", curOpcode->name);
1782 if (curOpcode->num_params > 0) {
1783 pshader_program_dump_ps_param(*pToken, 0);
1786 for (i = 1; i < curOpcode->num_params; ++i) {
1788 pshader_program_dump_ps_param(*pToken, 1);
1797 This->functionLength = (len + 1) * sizeof(DWORD);
1799 This->functionLength = 1; /* no Function defined use fixed function vertex processing */
1802 /* Generate HW shader in needed */
1803 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1804 TRACE("(%p) : Generating hardware program\n", This);
1806 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1810 TRACE("(%p) : Copying the function\n", This);
1811 /* copy the function ... because it will certainly be released by application */
1812 if (NULL != pFunction) {
1813 This->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->functionLength);
1814 memcpy((void *)This->function, pFunction, This->functionLength);
1816 This->function = NULL;
1819 /* TODO: Some proper return values for failures */
1820 TRACE("(%p) : Returning D3D_OK\n", This);
1824 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1826 /*** IUnknown methods ***/
1827 IWineD3DPixelShaderImpl_QueryInterface,
1828 IWineD3DPixelShaderImpl_AddRef,
1829 IWineD3DPixelShaderImpl_Release,
1830 /*** IWineD3DPixelShader methods ***/
1831 IWineD3DPixelShaderImpl_GetParent,
1832 IWineD3DPixelShaderImpl_GetDevice,
1833 IWineD3DPixelShaderImpl_GetFunction,
1834 /* not part of d3d */
1835 IWineD3DPixelShaderImpl_SetFunction