2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2005 Oliver Stieber
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 #include "wined3d_private.h"
30 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
32 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
34 #if 0 /* Must not be 1 in cvs version */
35 # define PSTRACE(A) TRACE A
36 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
39 # define TRACE_VSVECTOR(name)
42 #define GLNAME_REQUIRE_GLSL ((const char *)1)
43 /* *******************************************
44 IWineD3DPixelShader IUnknown parts follow
45 ******************************************* */
46 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
48 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
49 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
50 if (IsEqualGUID(riid, &IID_IUnknown)
51 || IsEqualGUID(riid, &IID_IWineD3DBase)
52 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
53 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
54 IUnknown_AddRef(iface);
62 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
63 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
64 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
65 return InterlockedIncrement(&This->ref);
68 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
69 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
71 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
72 ref = InterlockedDecrement(&This->ref);
74 HeapFree(GetProcessHeap(), 0, This);
79 /* TODO: At the momeny the function parser is single pass, it achievs this
80 by passing constants to a couple of functions where they are then modified.
81 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
82 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
85 /* *******************************************
86 IWineD3DPixelShader IWineD3DPixelShader parts follow
87 ******************************************* */
89 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
90 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
92 *parent = This->parent;
93 IUnknown_AddRef(*parent);
94 TRACE("(%p) : returning %p\n", This, *parent);
98 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
99 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
100 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
101 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
102 TRACE("(%p) returning %p\n", This, *pDevice);
107 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
108 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
109 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
112 *pSizeOfData = This->baseShader.functionLength;
115 if (*pSizeOfData < This->baseShader.functionLength) {
116 *pSizeOfData = This->baseShader.functionLength;
117 return WINED3DERR_MOREDATA;
119 if (NULL == This->baseShader.function) { /* no function defined */
120 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
121 (*(DWORD **) pData) = NULL;
123 if (This->baseShader.functionLength == 0) {
126 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
127 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
132 /*******************************
133 * pshader functions software VM
136 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
137 d->x = s0->x + s1->x;
138 d->y = s0->y + s1->y;
139 d->z = s0->z + s1->z;
140 d->w = s0->w + s1->w;
141 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
142 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
145 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
146 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
147 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
148 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
151 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
152 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
153 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
154 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
157 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
159 d->y = s0->y * s1->y;
162 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
163 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
166 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
172 tmp.f = floorf(s0->w);
173 d->x = powf(2.0f, tmp.f);
174 d->y = s0->w - tmp.f;
175 tmp.f = powf(2.0f, s0->w);
176 tmp.d &= 0xFFFFFF00U;
179 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
180 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
183 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
184 float tmp_f = fabsf(s0->w);
185 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
186 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
187 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
190 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
191 d->x = s0->x * s1->x + s2->x;
192 d->y = s0->y * s1->y + s2->y;
193 d->z = s0->z * s1->z + s2->z;
194 d->w = s0->w * s1->w + s2->w;
195 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
196 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
199 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
200 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
201 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
202 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
203 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
204 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
205 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
208 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
209 d->x = (s0->x < s1->x) ? s0->x : s1->x;
210 d->y = (s0->y < s1->y) ? s0->y : s1->y;
211 d->z = (s0->z < s1->z) ? s0->z : s1->z;
212 d->w = (s0->w < s1->w) ? s0->w : s1->w;
213 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
214 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
217 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
222 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
223 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
226 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
227 d->x = s0->x * s1->x;
228 d->y = s0->y * s1->y;
229 d->z = s0->z * s1->z;
230 d->w = s0->w * s1->w;
231 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
232 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
235 void pshader_nop(void) {
236 /* NOPPPP ahhh too easy ;) */
237 PSTRACE(("executing nop\n"));
240 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
241 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
242 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
243 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
246 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
247 float tmp_f = fabsf(s0->w);
248 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
249 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
250 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
253 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
254 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
255 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
256 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
257 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
258 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
259 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
262 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
263 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
264 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
265 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
266 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
267 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
268 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
271 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
272 d->x = s0->x - s1->x;
273 d->y = s0->y - s1->y;
274 d->z = s0->z - s1->z;
275 d->w = s0->w - s1->w;
276 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
277 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
281 * Version 1.1 specific
284 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
285 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
286 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
287 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
290 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
291 float tmp_f = fabsf(s0->w);
292 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
293 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
294 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
297 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
298 d->x = s0->x - floorf(s0->x);
299 d->y = s0->y - floorf(s0->y);
302 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
303 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
306 typedef FLOAT D3DMATRIX44[4][4];
307 typedef FLOAT D3DMATRIX43[4][3];
308 typedef FLOAT D3DMATRIX34[3][4];
309 typedef FLOAT D3DMATRIX33[3][3];
310 typedef FLOAT D3DMATRIX23[2][3];
312 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
314 * Buggy CODE: here only if cast not work for copy/paste
315 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
316 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
317 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
318 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
319 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
320 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
321 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
323 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
324 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
325 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
326 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
327 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
328 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
329 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
330 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
333 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
334 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
335 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
336 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
338 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
339 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
340 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
341 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
344 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
345 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
346 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
347 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
348 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
349 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
350 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
351 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
352 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
355 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
356 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
357 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
358 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
360 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
361 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
362 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
363 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
366 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
368 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
369 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
375 * Version 2.0 specific
377 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
378 d->x = s0->x * (s1->x - s2->x) + s2->x;
379 d->y = s0->y * (s1->y - s2->y) + s2->y;
380 d->z = s0->z * (s1->z - s2->z) + s2->z;
381 d->w = s0->w * (s1->w - s2->w) + s2->w;
384 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
385 d->x = s0->y * s1->z - s0->z * s1->y;
386 d->y = s0->z * s1->x - s0->x * s1->z;
387 d->z = s0->x * s1->y - s0->y * s1->x;
388 d->w = 0.9f; /* w is undefined, so set it to something safeish */
390 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
391 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
394 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
399 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
400 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
404 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
408 void pshader_texkill(WINED3DSHADERVECTOR* d) {
412 void pshader_tex(WINED3DSHADERVECTOR* d) {
415 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
419 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
423 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
427 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
431 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
435 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
439 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
443 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
447 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
451 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
455 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
459 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
463 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
467 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
468 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
472 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
476 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
480 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
484 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
488 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
492 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
496 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
500 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
504 void pshader_call(WINED3DSHADERVECTOR* d) {
508 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
512 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
516 void pshader_ret(void) {
520 void pshader_endloop(void) {
524 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
528 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
532 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
536 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
540 void pshader_rep(WINED3DSHADERVECTOR* d) {
544 void pshader_endrep(void) {
548 void pshader_if(WINED3DSHADERVECTOR* d) {
552 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
556 void pshader_else(void) {
560 void pshader_label(WINED3DSHADERVECTOR* d) {
564 void pshader_endif(void) {
568 void pshader_break(void) {
572 void pshader_breakc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
576 void pshader_breakp(WINED3DSHADERVECTOR* d) {
580 void pshader_defb(WINED3DSHADERVECTOR* d) {
584 void pshader_defi(WINED3DSHADERVECTOR* d) {
588 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
592 void pshader_dsx(WINED3DSHADERVECTOR* d) {
596 void pshader_dsy(WINED3DSHADERVECTOR* d) {
600 void pshader_texldd(WINED3DSHADERVECTOR* d) {
604 void pshader_setp(WINED3DSHADERVECTOR* d) {
608 void pshader_texldl(WINED3DSHADERVECTOR* d) {
613 * log, exp, frc, m*x* seems to be macros ins ... to see
615 CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[] = {
616 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
617 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
618 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
619 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
620 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
621 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
622 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
623 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
624 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
625 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
626 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
627 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
628 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
629 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
630 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
631 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
632 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
633 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
634 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
635 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
636 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
637 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
638 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
639 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
640 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
643 /** FIXME: use direct access so add the others opcodes as stubs */
644 /* DCL is a specil operation */
645 {D3DSIO_DCL, "dcl", NULL, 2, pshader_dcl, 0, 0},
646 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
647 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
648 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
651 MUL vec.xyz, vec, tmp;
652 but I think this is better because it accounts for w properly.
658 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
659 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
661 /* Flow control - requires GLSL or software shaders */
662 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 1, pshader_rep, 0, 0},
663 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
664 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 1, pshader_if, 0, 0},
665 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
666 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 0, pshader_else, 0, 0},
667 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 0, pshader_endif, 0, 0},
668 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 0, pshader_break, 0, 0},
669 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
670 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 1, pshader_breakp, 0, 0},
671 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
672 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
673 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
674 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
675 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
676 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
678 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
679 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
681 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
682 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
683 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
684 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
685 {D3DSIO_TEX, "texld", "undefined", 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
686 {D3DSIO_TEX, "texld", "undefined", 3, pshader_texld, D3DPS_VERSION(2,0), -1},
687 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
688 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
689 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
690 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
691 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
692 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
693 {D3DSIO_TEXM3x3PAD, "texm3x3pad", "undefined", 2, pshader_texm3x3pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
694 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
695 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
696 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
697 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
698 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
699 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
700 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
701 /* def is a special operation */
702 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
703 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
704 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
705 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
706 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
707 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
708 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
709 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
710 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
711 /* TODO: dp2add can be made out of multiple instuctions */
712 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
713 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
714 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
715 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
716 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
717 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
718 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
719 {0, NULL, NULL, 0, NULL, 0, 0}
722 inline static BOOL pshader_is_version_token(DWORD token) {
723 return 0xFFFF0000 == (token & 0xFFFF0000);
726 inline static BOOL pshader_is_comment_token(DWORD token) {
727 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
731 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
732 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
734 DWORD reg = param & D3DSP_REGNUM_MASK;
735 DWORD regtype = shader_get_regtype(param);
739 sprintf(regstr, "R%lu", reg);
743 strcpy(regstr, "fragment.color.primary");
745 strcpy(regstr, "fragment.color.secondary");
750 sprintf(regstr, "C%lu", reg);
752 sprintf(regstr, "program.env[%lu]", reg);
754 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
755 sprintf(regstr,"T%lu", reg);
758 sprintf(regstr, "%s", rastout_reg_names[reg]);
761 sprintf(regstr, "oD[%lu]", reg);
763 case D3DSPR_TEXCRDOUT:
764 sprintf(regstr, "oT[%lu]", reg);
767 FIXME("Unhandled register name Type(%ld)\n", regtype);
772 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
774 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
775 strcat(write_mask, ".");
776 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
777 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
778 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
779 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
783 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
784 static const char swizzle_reg_chars[] = "rgba";
785 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
786 DWORD swizzle_x = swizzle & 0x03;
787 DWORD swizzle_y = (swizzle >> 2) & 0x03;
788 DWORD swizzle_z = (swizzle >> 4) & 0x03;
789 DWORD swizzle_w = (swizzle >> 6) & 0x03;
791 * swizzle bits fields:
795 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
796 if (swizzle_x == swizzle_y &&
797 swizzle_x == swizzle_z &&
798 swizzle_x == swizzle_w) {
799 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
801 sprintf(swzstring, ".%c%c%c%c",
802 swizzle_reg_chars[swizzle_x],
803 swizzle_reg_chars[swizzle_y],
804 swizzle_reg_chars[swizzle_z],
805 swizzle_reg_chars[swizzle_w]);
810 static const char* shift_tab[] = {
811 "dummy", /* 0 (none) */
812 "coefmul.x", /* 1 (x2) */
813 "coefmul.y", /* 2 (x4) */
814 "coefmul.z", /* 3 (x8) */
815 "coefmul.w", /* 4 (x16) */
816 "dummy", /* 5 (x32) */
817 "dummy", /* 6 (x64) */
818 "dummy", /* 7 (x128) */
819 "dummy", /* 8 (d256) */
820 "dummy", /* 9 (d128) */
821 "dummy", /* 10 (d64) */
822 "dummy", /* 11 (d32) */
823 "coefdiv.w", /* 12 (d16) */
824 "coefdiv.z", /* 13 (d8) */
825 "coefdiv.y", /* 14 (d4) */
826 "coefdiv.x" /* 15 (d2) */
829 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
830 /* Generate a line that does the output modifier computation */
831 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
834 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
835 /* Generate a line that does the input modifier computation and return the input register to use */
836 static char regstr[256];
837 static char tmpline[256];
840 /* Assume a new line will be added */
843 /* Get register name */
844 get_register_name(instr, regstr, constants);
846 TRACE(" Register name %s\n", regstr);
847 switch (instr & D3DSP_SRCMOD_MASK) {
849 strcpy(outregstr, regstr);
853 sprintf(outregstr, "-%s", regstr);
857 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
859 case D3DSPSM_BIASNEG:
860 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
863 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
865 case D3DSPSM_SIGNNEG:
866 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
869 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
872 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
875 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
878 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
879 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
880 strcat(line, "\n"); /* Hack */
881 strcat(line, tmpline);
884 sprintf(line, "RCP T%c, %s.w;", 'A' + tmpreg, regstr);
885 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
886 strcat(line, "\n"); /* Hack */
887 strcat(line, tmpline);
890 strcpy(outregstr, regstr);
895 /* Substitute the register name */
896 sprintf(outregstr, "T%c", 'A' + tmpreg);
902 inline static void pshader_program_get_registers_used(
903 IWineD3DPixelShaderImpl *This,
904 CONST DWORD* pToken, DWORD* tempsUsed, DWORD* texUsed) {
912 while (D3DVS_END() != *pToken) {
913 CONST SHADER_OPCODE* curOpcode;
916 if (pshader_is_version_token(*pToken)) {
921 } else if (pshader_is_comment_token(*pToken)) {
922 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
924 pToken += comment_len;
929 curOpcode = shader_get_opcode((IWineD3DBaseShader*) This, *pToken);
932 /* Skip declarations (for now) */
933 if (D3DSIO_DCL == curOpcode->opcode) {
934 pToken += curOpcode->num_params;
937 /* Skip definitions (for now) */
938 } else if (D3DSIO_DEF == curOpcode->opcode) {
939 pToken += curOpcode->num_params;
942 /* Set texture registers, and temporary registers */
946 for (i = 0; i < curOpcode->num_params; ++i) {
947 DWORD regtype = shader_get_regtype(*pToken);
948 DWORD reg = (*pToken) & D3DSP_REGNUM_MASK;
949 if (D3DSPR_TEXTURE == regtype)
950 *texUsed |= (1 << reg);
951 if (D3DSPR_TEMP == regtype)
952 *tempsUsed |= (1 << reg);
959 void pshader_set_version(
960 IWineD3DPixelShaderImpl *This,
963 DWORD major = (version >> 8) & 0x0F;
964 DWORD minor = version & 0x0F;
966 This->baseShader.hex_version = version;
967 This->baseShader.version = major * 10 + minor;
968 TRACE("ps_%lu_%lu\n", major, minor);
970 This->baseShader.limits.address = 0;
972 switch (This->baseShader.version) {
976 case 13: This->baseShader.limits.temporary = 2;
977 This->baseShader.limits.constant_float = 8;
978 This->baseShader.limits.constant_int = 0;
979 This->baseShader.limits.constant_bool = 0;
980 This->baseShader.limits.texture = 4;
983 case 14: This->baseShader.limits.temporary = 6;
984 This->baseShader.limits.constant_float = 8;
985 This->baseShader.limits.constant_int = 0;
986 This->baseShader.limits.constant_bool = 0;
987 This->baseShader.limits.texture = 6;
990 /* FIXME: temporaries must match D3DPSHADERCAPS2_0.NumTemps */
991 case 20: This->baseShader.limits.temporary = 32;
992 This->baseShader.limits.constant_float = 32;
993 This->baseShader.limits.constant_int = 16;
994 This->baseShader.limits.constant_bool = 16;
995 This->baseShader.limits.texture = 8;
998 case 30: This->baseShader.limits.temporary = 32;
999 This->baseShader.limits.constant_float = 224;
1000 This->baseShader.limits.constant_int = 16;
1001 This->baseShader.limits.constant_bool = 16;
1002 This->baseShader.limits.texture = 0;
1005 default: This->baseShader.limits.temporary = 32;
1006 This->baseShader.limits.constant_float = 8;
1007 This->baseShader.limits.constant_int = 0;
1008 This->baseShader.limits.constant_bool = 0;
1009 This->baseShader.limits.texture = 8;
1010 FIXME("Unrecognized pixel shader version %lu!\n", version);
1014 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
1015 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1016 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1017 const DWORD *pToken = pFunction;
1018 const SHADER_OPCODE *curOpcode = NULL;
1019 const DWORD *pInstr;
1022 #if 0 /* TODO: loop register (just another address register ) */
1023 BOOL hasLoops = FALSE;
1025 SHADER_BUFFER buffer;
1027 BOOL saturate; /* clamp to 0.0 -> 1.0*/
1028 int row = 0; /* not sure, something to do with macros? */
1030 int version = This->baseShader.version;
1032 /* Keep bitmaps of used temporary and texture registers */
1033 DWORD tempsUsed, texUsed;
1035 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
1036 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
1037 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
1038 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
1039 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
1040 This->fixupVertexBufferSize = SHADER_PGMSIZE;
1041 This->fixupVertexBuffer[0] = 0;
1043 buffer.buffer = This->device->fixupVertexBuffer;
1045 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
1050 /* FIXME: if jumps are used, use GLSL, else use ARB_fragment_program */
1051 shader_addline(&buffer, "!!ARBfp1.0\n");
1053 /* TODO: Think about using a first pass to work out what's required for the second pass. */
1054 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
1055 This->constants[i] = 0;
1057 /* First pass: figure out which temporary and texture registers are used */
1058 pshader_program_get_registers_used(This, pToken, &tempsUsed, &texUsed);
1059 TRACE("Texture registers used: %#lx, Temp registers used %#lx\n", texUsed, tempsUsed);
1061 /* TODO: check register usage against GL/Directx limits, and fail if they're exceeded */
1063 /* Pre-declare registers */
1064 for(i = 0; i < This->baseShader.limits.texture; i++) {
1065 if (texUsed & (1 << i))
1066 shader_addline(&buffer,"TEMP T%lu;\n", i);
1069 for(i = 0; i < This->baseShader.limits.temporary; i++) {
1070 if (tempsUsed & (1 << i))
1071 shader_addline(&buffer, "TEMP R%lu;\n", i);
1074 /* Necessary for internal operations */
1075 shader_addline(&buffer, "TEMP TMP;\n");
1076 shader_addline(&buffer, "TEMP TMP2;\n");
1077 shader_addline(&buffer, "TEMP TA;\n");
1078 shader_addline(&buffer, "TEMP TB;\n");
1079 shader_addline(&buffer, "TEMP TC;\n");
1080 shader_addline(&buffer, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1081 shader_addline(&buffer, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1082 shader_addline(&buffer, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1084 /* Texture coordinate registers must be pre-loaded */
1085 for (i = 0; i < This->baseShader.limits.texture; i++) {
1086 if (texUsed & (1 << i))
1087 shader_addline(&buffer, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1090 /* Second pass, process opcodes */
1091 if (NULL != pToken) {
1092 while (D3DPS_END() != *pToken) {
1093 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1095 instructionSize = pToken & SIZEBITS >> 27;
1099 /* Skip version token */
1100 if (pshader_is_version_token(*pToken)) {
1105 /* Skip comment tokens */
1106 if (pshader_is_comment_token(*pToken)) {
1107 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1109 TRACE("#%s\n", (char*)pToken);
1110 pToken += comment_len;
1114 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1118 curOpcode = shader_get_opcode((IWineD3DBaseShader*) This, *pToken);
1120 if (NULL == curOpcode) {
1121 /* unknown current opcode ... (shouldn't be any!) */
1122 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1123 FIXME("unrecognized opcode: %08lx\n", *pToken);
1126 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1127 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1128 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1129 pToken += curOpcode->num_params;
1131 } else if (D3DSIO_DEF == curOpcode->opcode) {
1133 /* Handle definitions here, they don't fit well with the
1134 * other instructions below [for now ] */
1136 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1138 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1139 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1141 shader_addline(&buffer,
1142 "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1143 *((const float *)(pToken + 1)),
1144 *((const float *)(pToken + 2)),
1145 *((const float *)(pToken + 3)),
1146 *((const float *)(pToken + 4)) );
1148 This->constants[reg] = 1;
1154 /* Common processing: [inst] [dst]* [src]* */
1156 char output_rname[256];
1157 char output_wmask[20];
1159 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1160 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1164 /* Build opcode for GL vertex_program */
1165 switch (curOpcode->opcode) {
1191 case D3DSIO_TEXKILL:
1192 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
1193 strcpy(tmpLine, curOpcode->glname);
1199 char reg_coord_swz[20] = "";
1200 DWORD reg_dest_code;
1201 DWORD reg_sampler_code;
1203 /* All versions have a destination register */
1204 reg_dest_code = *pToken & D3DSP_REGNUM_MASK;
1205 get_register_name(*pToken++, reg_dest, This->constants);
1207 /* 1.0-1.3: Use destination register as coordinate source. No modifiers.
1208 1.4: Use provided coordinate source register. _dw, _dz, swizzle allowed.
1209 2.0+: Use provided coordinate source register. No modifiers.
1210 3.0+: Use provided coordinate source register. Swizzle allowed */
1212 strcpy(reg_coord, reg_dest);
1214 else if (version == 14) {
1215 if (gen_input_modifier_line(*pToken, 0, reg_coord, tmpLine, This->constants))
1216 shader_addline(&buffer, tmpLine);
1217 get_input_register_swizzle(*pToken, reg_coord_swz);
1220 else if (version > 14 && version < 30) {
1221 get_register_name(*pToken, reg_coord, This->constants);
1224 else if (version >= 30) {
1225 get_input_register_swizzle(*pToken, reg_coord_swz);
1226 get_register_name(*pToken, reg_coord, This->constants);
1230 /* 1.0-1.4: Use destination register number as texture code.
1231 2.0+: Use provided sampler number as texure code. */
1233 reg_sampler_code = reg_dest_code;
1236 reg_sampler_code = *pToken & D3DSP_REGNUM_MASK;
1240 shader_addline(&buffer, "TEX %s, %s%s, texture[%lu], 2D;\n",
1241 reg_dest, reg_coord, reg_coord_swz, reg_sampler_code);
1245 case D3DSIO_TEXCOORD:
1248 get_write_mask(*pToken, tmp);
1249 if (version != 14) {
1250 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1251 shader_addline(&buffer, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1254 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1255 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1256 shader_addline(&buffer, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1262 case D3DSIO_TEXM3x2PAD:
1264 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1266 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1267 shader_addline(&buffer, tmpLine);
1268 shader_addline(&buffer, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1273 case D3DSIO_TEXM3x2TEX:
1275 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1277 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1278 shader_addline(&buffer, tmpLine);
1279 shader_addline(&buffer, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1280 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1285 case D3DSIO_TEXREG2AR:
1287 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1288 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1289 shader_addline(&buffer, "MOV TMP.r, T%lu.a;\n", reg2);
1290 shader_addline(&buffer, "MOV TMP.g, T%lu.r;\n", reg2);
1291 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1296 case D3DSIO_TEXREG2GB:
1298 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1299 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1300 shader_addline(&buffer, "MOV TMP.r, T%lu.g;\n", reg2);
1301 shader_addline(&buffer, "MOV TMP.g, T%lu.b;\n", reg2);
1302 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1309 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1310 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1312 /* FIXME: Should apply the BUMPMAPENV matrix */
1313 shader_addline(&buffer, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1314 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1319 case D3DSIO_TEXM3x3PAD:
1321 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1323 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1324 shader_addline(&buffer, tmpLine);
1325 shader_addline(&buffer, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1331 case D3DSIO_TEXM3x3TEX:
1333 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1335 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1336 shader_addline(&buffer, tmpLine);
1337 shader_addline(&buffer, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1339 /* Cubemap textures will be more used than 3D ones. */
1340 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1345 case D3DSIO_TEXM3x3VSPEC:
1347 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1349 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1350 shader_addline(&buffer, tmpLine);
1351 shader_addline(&buffer, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1353 /* Construct the eye-ray vector from w coordinates */
1354 shader_addline(&buffer, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1355 shader_addline(&buffer, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1356 shader_addline(&buffer, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1358 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1359 shader_addline(&buffer, "DP3 TMP.w, TMP, TMP2;\n");
1360 shader_addline(&buffer, "MUL TMP, TMP.w, TMP;\n");
1361 shader_addline(&buffer, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1363 /* Cubemap textures will be more used than 3D ones. */
1364 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1370 case D3DSIO_TEXM3x3SPEC:
1372 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1373 DWORD reg3 = *(pToken + 2) & D3DSP_REGNUM_MASK;
1375 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants))
1376 shader_addline(&buffer, tmpLine);
1377 shader_addline(&buffer, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1379 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1380 shader_addline(&buffer, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1381 shader_addline(&buffer, "MUL TMP, TMP.w, TMP;\n");
1382 shader_addline(&buffer, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1384 /* Cubemap textures will be more used than 3D ones. */
1385 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1393 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1394 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1396 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1398 pToken += curOpcode->num_params;
1402 /* Process modifiers */
1403 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1404 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1406 case D3DSPDM_SATURATE: saturate = TRUE; break;
1407 #if 0 /* as yet unhandled modifiers */
1408 case D3DSPDM_CENTROID: centroid = TRUE; break;
1409 case D3DSPDM_PP: partialpresision = TRUE; break;
1412 TRACE("_unhandled_modifier(0x%08lx)\n", mask);
1415 shift = (*pToken & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1417 /* Generate input and output registers */
1418 if (curOpcode->num_params > 0) {
1420 char operands[4][100];
1424 /* Generate lines that handle input modifier computation */
1425 for (i = 1; i < curOpcode->num_params; ++i) {
1426 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1427 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpOp, This->constants)) {
1428 shader_addline(&buffer, tmpOp);
1432 /* Handle output register */
1433 get_register_name(*pToken, output_rname, This->constants);
1434 strcpy(operands[0], output_rname);
1435 get_write_mask(*pToken, output_wmask);
1436 strcat(operands[0], output_wmask);
1438 /* This function works because of side effects from gen_input_modifier_line */
1439 /* Handle input registers */
1440 for (i = 1; i < curOpcode->num_params; ++i) {
1441 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1442 strcpy(operands[i], regs[i - 1]);
1443 get_input_register_swizzle(*(pToken + i), swzstring);
1444 strcat(operands[i], swzstring);
1447 switch(curOpcode->opcode) {
1449 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""),
1450 operands[0], operands[1], operands[3], operands[2]);
1453 shader_addline(&buffer, "ADD TMP, -%s, coefdiv.x;\n", operands[1]);
1454 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""),
1455 operands[0], operands[2], operands[3]);
1458 if (saturate && (shift == 0))
1459 strcat(tmpLine, "_SAT");
1460 strcat(tmpLine, " ");
1461 strcat(tmpLine, operands[0]);
1462 for (i = 1; i < curOpcode->num_params; i++) {
1463 strcat(tmpLine, ", ");
1464 strcat(tmpLine, operands[i]);
1466 strcat(tmpLine,";\n");
1468 shader_addline(&buffer, tmpLine);
1470 /* A shift requires another line. */
1472 gen_output_modifier_line(saturate, output_wmask, shift, output_rname, tmpLine);
1473 shader_addline(&buffer, tmpLine);
1475 pToken += curOpcode->num_params;
1479 /* TODO: What about result.depth? */
1480 shader_addline(&buffer, "MOV result.color, R0;\n");
1481 shader_addline(&buffer, "END\n");
1484 /* finally null terminate the buffer */
1485 buffer.buffer[buffer.bsize] = 0;
1486 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1487 /* Create the hw shader */
1489 /* The program string sometimes gets too long for a normal TRACE */
1490 TRACE("Generated program:\n");
1491 if (TRACE_ON(d3d_shader)) {
1492 fprintf(stderr, "%s\n", buffer.buffer);
1495 /* TODO: change to resource.glObjectHandel or something like that */
1496 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
1498 TRACE("Creating a hw pixel shader, prg=%d\n", This->baseShader.prgId);
1499 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->baseShader.prgId));
1501 TRACE("Created hw pixel shader, prg=%d\n", This->baseShader.prgId);
1502 /* Create the program and check for errors */
1503 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
1504 buffer.bsize, buffer.buffer));
1506 if (glGetError() == GL_INVALID_OPERATION) {
1508 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1509 FIXME("HW PixelShader Error at position %d: %s\n",
1510 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1511 This->baseShader.prgId = -1;
1514 #if 1 /* if were using the data buffer of device then we don't need to free it */
1515 HeapFree(GetProcessHeap(), 0, buffer.buffer);
1519 inline static void pshader_program_dump_ins_modifiers(const DWORD output) {
1521 DWORD shift = (output & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1522 DWORD mmask = output & D3DSP_DSTMOD_MASK;
1526 case 13: TRACE("_d8"); break;
1527 case 14: TRACE("_d4"); break;
1528 case 15: TRACE("_d2"); break;
1529 case 1: TRACE("_x2"); break;
1530 case 2: TRACE("_x4"); break;
1531 case 3: TRACE("_x8"); break;
1532 default: TRACE("_unhandled_shift(%ld)", shift); break;
1536 case D3DSPDM_NONE: break;
1537 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1538 case D3DSPDM_PARTIALPRECISION: TRACE("_pp"); break;
1539 case D3DSPDM_MSAMPCENTROID: TRACE("_centroid"); break;
1540 default: TRACE("_unhandled_modifier(%#lx)", mmask); break;
1544 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1545 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1546 static const char swizzle_reg_chars[] = "rgba";
1548 DWORD reg = param & D3DSP_REGNUM_MASK;
1549 DWORD regtype = shader_get_regtype(param);
1552 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1553 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1554 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1555 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1557 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1569 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1572 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1575 case D3DSPR_RASTOUT:
1576 TRACE("%s", rastout_reg_names[reg]);
1578 case D3DSPR_ATTROUT:
1579 TRACE("oD%lu", reg);
1581 case D3DSPR_TEXCRDOUT:
1582 TRACE("oT%lu", reg);
1584 case D3DSPR_CONSTINT:
1585 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1587 case D3DSPR_CONSTBOOL:
1588 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1594 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1596 case D3DSPR_SAMPLER:
1600 TRACE("unhandled_rtype(%lx)", regtype);
1605 /* operand output (for modifiers and shift, see dump_ins_modifiers) */
1607 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1609 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1610 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1611 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1612 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1615 /** operand input */
1616 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1617 DWORD swizzle_r = swizzle & 0x03;
1618 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1619 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1620 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1622 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1623 DWORD mask = param & D3DSP_SRCMOD_MASK;
1624 /*TRACE("_modifier(0x%08lx) ", mask);*/
1626 case D3DSPSM_NONE: break;
1627 case D3DSPSM_NEG: break;
1628 case D3DSPSM_BIAS: TRACE("_bias"); break;
1629 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1630 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1631 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1632 case D3DSPSM_COMP: break;
1633 case D3DSPSM_X2: TRACE("_x2"); break;
1634 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1635 case D3DSPSM_DZ: TRACE("_dz"); break;
1636 case D3DSPSM_DW: TRACE("_dw"); break;
1638 TRACE("_unknown(0x%08lx)", mask);
1643 * swizzle bits fields:
1646 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1647 if (swizzle_r == swizzle_g &&
1648 swizzle_r == swizzle_b &&
1649 swizzle_r == swizzle_a) {
1650 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1653 swizzle_reg_chars[swizzle_r],
1654 swizzle_reg_chars[swizzle_g],
1655 swizzle_reg_chars[swizzle_b],
1656 swizzle_reg_chars[swizzle_a]);
1662 inline static void pshader_program_dump_decl_usage(
1663 IWineD3DPixelShaderImpl *This, DWORD decl, DWORD param) {
1665 DWORD regtype = shader_get_regtype(param);
1668 if (regtype == D3DSPR_SAMPLER) {
1669 DWORD ttype = decl & D3DSP_TEXTURETYPE_MASK;
1672 case D3DSTT_2D: TRACE("2d "); break;
1673 case D3DSTT_CUBE: TRACE("cube "); break;
1674 case D3DSTT_VOLUME: TRACE("volume "); break;
1675 default: TRACE("unknown_ttype(%08lx) ", ttype);
1680 DWORD usage = decl & D3DSP_DCL_USAGE_MASK;
1681 DWORD idx = (decl & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
1684 case D3DDECLUSAGE_POSITION:
1685 TRACE("%s%ld ", "position", idx);
1687 case D3DDECLUSAGE_BLENDINDICES:
1688 TRACE("%s ", "blend");
1690 case D3DDECLUSAGE_BLENDWEIGHT:
1691 TRACE("%s ", "weight");
1693 case D3DDECLUSAGE_NORMAL:
1694 TRACE("%s%ld ", "normal", idx);
1696 case D3DDECLUSAGE_PSIZE:
1697 TRACE("%s ", "psize");
1699 case D3DDECLUSAGE_COLOR:
1701 TRACE("%s ", "color");
1703 TRACE("%s%ld ", "specular", (idx - 1));
1706 case D3DDECLUSAGE_TEXCOORD:
1707 TRACE("%s%ld ", "texture", idx);
1709 case D3DDECLUSAGE_TANGENT:
1710 TRACE("%s ", "tangent");
1712 case D3DDECLUSAGE_BINORMAL:
1713 TRACE("%s ", "binormal");
1715 case D3DDECLUSAGE_TESSFACTOR:
1716 TRACE("%s ", "tessfactor");
1718 case D3DDECLUSAGE_POSITIONT:
1719 TRACE("%s%ld ", "positionT", idx);
1721 case D3DDECLUSAGE_FOG:
1722 TRACE("%s ", "fog");
1724 case D3DDECLUSAGE_DEPTH:
1725 TRACE("%s ", "depth");
1727 case D3DDECLUSAGE_SAMPLE:
1728 TRACE("%s ", "sample");
1731 FIXME("Unrecognised dcl %08lx", usage);
1736 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1737 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1738 const DWORD* pToken = pFunction;
1739 const SHADER_OPCODE *curOpcode = NULL;
1742 TRACE("(%p) : Parsing programme\n", This);
1744 if (NULL != pToken) {
1745 while (D3DPS_END() != *pToken) {
1746 if (pshader_is_version_token(*pToken)) { /** version */
1747 pshader_set_version(This, *pToken);
1752 if (pshader_is_comment_token(*pToken)) { /** comment */
1753 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1755 TRACE("//%s\n", (char*)pToken);
1756 pToken += comment_len;
1757 len += comment_len + 1;
1760 if (!This->baseShader.version) {
1761 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1763 curOpcode = shader_get_opcode((IWineD3DBaseShader*) This, *pToken);
1766 if (NULL == curOpcode) {
1768 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1769 while (*pToken & 0x80000000) {
1771 /* unknown current opcode ... */
1772 TRACE("unrecognized opcode: %08lx", *pToken);
1779 if (curOpcode->opcode == D3DSIO_DCL) {
1780 pshader_program_dump_decl_usage(This, *pToken, *(pToken + 1));
1783 pshader_program_dump_ps_param(*pToken, 0);
1787 if (curOpcode->opcode == D3DSIO_DEF) {
1788 TRACE("def c%lu = ", *pToken & 0xFF);
1791 TRACE("%f ,", *(float *)pToken);
1794 TRACE("%f ,", *(float *)pToken);
1797 TRACE("%f ,", *(float *)pToken);
1800 TRACE("%f", *(float *)pToken);
1804 TRACE("%s", curOpcode->name);
1805 if (curOpcode->num_params > 0) {
1806 pshader_program_dump_ins_modifiers(*pToken);
1808 pshader_program_dump_ps_param(*pToken, 0);
1811 for (i = 1; i < curOpcode->num_params; ++i) {
1813 pshader_program_dump_ps_param(*pToken, 1);
1822 This->baseShader.functionLength = (len + 1) * sizeof(DWORD);
1824 This->baseShader.functionLength = 1; /* no Function defined use fixed function vertex processing */
1827 /* Generate HW shader in needed */
1828 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1829 TRACE("(%p) : Generating hardware program\n", This);
1831 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1835 TRACE("(%p) : Copying the function\n", This);
1836 /* copy the function ... because it will certainly be released by application */
1837 if (NULL != pFunction) {
1838 This->baseShader.function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1839 memcpy((void *)This->baseShader.function, pFunction, This->baseShader.functionLength);
1841 This->baseShader.function = NULL;
1844 /* TODO: Some proper return values for failures */
1845 TRACE("(%p) : Returning WINED3D_OK\n", This);
1849 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1851 /*** IUnknown methods ***/
1852 IWineD3DPixelShaderImpl_QueryInterface,
1853 IWineD3DPixelShaderImpl_AddRef,
1854 IWineD3DPixelShaderImpl_Release,
1855 /*** IWineD3DBase methods ***/
1856 IWineD3DPixelShaderImpl_GetParent,
1857 /*** IWineD3DBaseShader methods ***/
1858 IWineD3DPixelShaderImpl_SetFunction,
1859 /*** IWineD3DPixelShader methods ***/
1860 IWineD3DPixelShaderImpl_GetDevice,
1861 IWineD3DPixelShaderImpl_GetFunction