2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2005 Oliver Stieber
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 #include "wined3d_private.h"
30 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
32 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
34 #if 0 /* Must not be 1 in cvs version */
35 # define PSTRACE(A) TRACE A
36 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
39 # define TRACE_VSVECTOR(name)
42 #define GLNAME_REQUIRE_GLSL ((const char *)1)
43 /* *******************************************
44 IWineD3DPixelShader IUnknown parts follow
45 ******************************************* */
46 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
48 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
49 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
50 if (IsEqualGUID(riid, &IID_IUnknown)
51 || IsEqualGUID(riid, &IID_IWineD3DBase)
52 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
53 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
54 IUnknown_AddRef(iface);
62 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
63 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
64 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
65 return InterlockedIncrement(&This->ref);
68 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
69 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
71 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
72 ref = InterlockedDecrement(&This->ref);
74 HeapFree(GetProcessHeap(), 0, This);
79 /* TODO: At the momeny the function parser is single pass, it achievs this
80 by passing constants to a couple of functions where they are then modified.
81 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
82 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
85 /* *******************************************
86 IWineD3DPixelShader IWineD3DPixelShader parts follow
87 ******************************************* */
89 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
90 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
92 *parent = This->parent;
93 IUnknown_AddRef(*parent);
94 TRACE("(%p) : returning %p\n", This, *parent);
98 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
99 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
100 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
101 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
102 TRACE("(%p) returning %p\n", This, *pDevice);
107 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
108 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
109 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
112 *pSizeOfData = This->baseShader.functionLength;
115 if (*pSizeOfData < This->baseShader.functionLength) {
116 *pSizeOfData = This->baseShader.functionLength;
117 return WINED3DERR_MOREDATA;
119 if (NULL == This->baseShader.function) { /* no function defined */
120 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
121 (*(DWORD **) pData) = NULL;
123 if (This->baseShader.functionLength == 0) {
126 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
127 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
132 /*******************************
133 * pshader functions software VM
136 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
137 d->x = s0->x + s1->x;
138 d->y = s0->y + s1->y;
139 d->z = s0->z + s1->z;
140 d->w = s0->w + s1->w;
141 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
142 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
145 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
146 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
147 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
148 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
151 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
152 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
153 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
154 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
157 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
159 d->y = s0->y * s1->y;
162 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
163 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
166 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
172 tmp.f = floorf(s0->w);
173 d->x = powf(2.0f, tmp.f);
174 d->y = s0->w - tmp.f;
175 tmp.f = powf(2.0f, s0->w);
176 tmp.d &= 0xFFFFFF00U;
179 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
180 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
183 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
184 float tmp_f = fabsf(s0->w);
185 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
186 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
187 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
190 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
191 d->x = s0->x * s1->x + s2->x;
192 d->y = s0->y * s1->y + s2->y;
193 d->z = s0->z * s1->z + s2->z;
194 d->w = s0->w * s1->w + s2->w;
195 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
196 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
199 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
200 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
201 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
202 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
203 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
204 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
205 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
208 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
209 d->x = (s0->x < s1->x) ? s0->x : s1->x;
210 d->y = (s0->y < s1->y) ? s0->y : s1->y;
211 d->z = (s0->z < s1->z) ? s0->z : s1->z;
212 d->w = (s0->w < s1->w) ? s0->w : s1->w;
213 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
214 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
217 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
222 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
223 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
226 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
227 d->x = s0->x * s1->x;
228 d->y = s0->y * s1->y;
229 d->z = s0->z * s1->z;
230 d->w = s0->w * s1->w;
231 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
232 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
235 void pshader_nop(void) {
236 /* NOPPPP ahhh too easy ;) */
237 PSTRACE(("executing nop\n"));
240 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
241 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
242 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
243 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
246 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
247 float tmp_f = fabsf(s0->w);
248 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
249 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
250 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
253 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
254 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
255 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
256 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
257 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
258 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
259 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
262 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
263 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
264 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
265 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
266 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
267 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
268 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
271 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
272 d->x = s0->x - s1->x;
273 d->y = s0->y - s1->y;
274 d->z = s0->z - s1->z;
275 d->w = s0->w - s1->w;
276 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
277 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
281 * Version 1.1 specific
284 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
285 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
286 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
287 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
290 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
291 float tmp_f = fabsf(s0->w);
292 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
293 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
294 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
297 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
298 d->x = s0->x - floorf(s0->x);
299 d->y = s0->y - floorf(s0->y);
302 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
303 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
306 typedef FLOAT D3DMATRIX44[4][4];
307 typedef FLOAT D3DMATRIX43[4][3];
308 typedef FLOAT D3DMATRIX34[3][4];
309 typedef FLOAT D3DMATRIX33[3][3];
310 typedef FLOAT D3DMATRIX23[2][3];
312 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
314 * Buggy CODE: here only if cast not work for copy/paste
315 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
316 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
317 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
318 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
319 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
320 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
321 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
323 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
324 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
325 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
326 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
327 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
328 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
329 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
330 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
333 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
334 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
335 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
336 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
338 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
339 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
340 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
341 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
344 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
345 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
346 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
347 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
348 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
349 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
350 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
351 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
352 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
355 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
356 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
357 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
358 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
360 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
361 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
362 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
363 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
366 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
368 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
369 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
375 * Version 2.0 specific
377 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
378 d->x = s0->x * (s1->x - s2->x) + s2->x;
379 d->y = s0->y * (s1->y - s2->y) + s2->y;
380 d->z = s0->z * (s1->z - s2->z) + s2->z;
381 d->w = s0->w * (s1->w - s2->w) + s2->w;
384 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
385 d->x = s0->y * s1->z - s0->z * s1->y;
386 d->y = s0->z * s1->x - s0->x * s1->z;
387 d->z = s0->x * s1->y - s0->y * s1->x;
388 d->w = 0.9f; /* w is undefined, so set it to something safeish */
390 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
391 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
394 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
399 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
400 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
404 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
408 void pshader_texkill(WINED3DSHADERVECTOR* d) {
412 void pshader_tex(WINED3DSHADERVECTOR* d) {
415 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
419 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
423 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
427 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
431 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
435 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
439 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
443 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
447 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
451 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
455 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
459 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
463 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
467 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
468 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
472 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
476 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
480 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
484 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
488 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
492 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
496 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
500 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
504 void pshader_call(WINED3DSHADERVECTOR* d) {
508 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
512 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
516 void pshader_ret(void) {
520 void pshader_endloop(void) {
524 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
528 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
532 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
536 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
540 void pshader_rep(WINED3DSHADERVECTOR* d) {
544 void pshader_endrep(void) {
548 void pshader_if(WINED3DSHADERVECTOR* d) {
552 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
556 void pshader_else(void) {
560 void pshader_label(WINED3DSHADERVECTOR* d) {
564 void pshader_endif(void) {
568 void pshader_break(void) {
572 void pshader_breakc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
576 void pshader_breakp(WINED3DSHADERVECTOR* d) {
580 void pshader_defb(WINED3DSHADERVECTOR* d) {
584 void pshader_defi(WINED3DSHADERVECTOR* d) {
588 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
592 void pshader_dsx(WINED3DSHADERVECTOR* d) {
596 void pshader_dsy(WINED3DSHADERVECTOR* d) {
600 void pshader_texldd(WINED3DSHADERVECTOR* d) {
604 void pshader_setp(WINED3DSHADERVECTOR* d) {
608 void pshader_texldl(WINED3DSHADERVECTOR* d) {
613 void pshader_hw_map2gl(SHADER_OPCODE_ARG* arg);
616 * log, exp, frc, m*x* seems to be macros ins ... to see
618 CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[] = {
621 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, pshader_hw_map2gl, 0, 0},
622 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, pshader_hw_map2gl, 0, 0},
623 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, pshader_hw_map2gl, 0, 0},
624 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, pshader_hw_map2gl, 0, 0},
625 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, pshader_hw_map2gl, 0, 0},
626 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, pshader_hw_map2gl, 0, 0},
627 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, pshader_hw_map2gl, 0, 0},
628 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, pshader_hw_map2gl, 0, 0},
629 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, pshader_hw_map2gl, 0, 0},
630 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, pshader_hw_map2gl, 0, 0},
631 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, pshader_hw_map2gl, 0, 0},
632 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, pshader_hw_map2gl, 0, 0},
633 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, pshader_hw_map2gl, 0, 0},
634 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, pshader_hw_map2gl, 0, 0},
635 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, pshader_hw_map2gl, 0, 0},
636 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, pshader_hw_map2gl, 0, 0},
637 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, pshader_hw_map2gl, 0, 0},
638 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, pshader_hw_map2gl, 0, 0},
639 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, pshader_hw_map2gl, 0, 0},
640 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, pshader_hw_map2gl, 0, 0},
641 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, pshader_hw_map2gl, 0, 0},
642 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, pshader_hw_map2gl, 0, 0},
643 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, NULL, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
644 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, NULL, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
645 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, NULL, 0, 0},
646 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, NULL, 0, 0},
647 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
650 MUL vec.xyz, vec, tmp;
651 but I think this is better because it accounts for w properly.
657 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, NULL, 0, 0},
658 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, NULL, 0, 0},
659 /* TODO: dp2add can be made out of multiple instuctions */
660 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, NULL, 0, 0},
663 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, NULL, 0, 0},
664 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, NULL, 0, 0},
665 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, NULL, 0, 0},
666 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, NULL, 0, 0},
667 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, NULL, 0, 0},
669 /* Register declarations */
670 {D3DSIO_DCL, "dcl", NULL, 2, pshader_dcl, NULL, 0, 0},
672 /* Flow control - requires GLSL or software shaders */
673 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 1, pshader_rep, NULL, 0, 0},
674 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, NULL, 0, 0},
675 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 1, pshader_if, NULL, 0, 0},
676 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, NULL, 0, 0},
677 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 0, pshader_else, NULL, 0, 0},
678 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 0, pshader_endif, NULL, 0, 0},
679 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 0, pshader_break, NULL, 0, 0},
680 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, NULL, 0, 0},
681 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 1, pshader_breakp, NULL, 0, 0},
682 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, NULL, 0, 0},
683 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, NULL, 0, 0},
684 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, NULL, 0, 0},
685 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, NULL, 0, 0},
686 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, NULL, 0, 0},
687 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, NULL, 0, 0},
689 /* Constant definitions */
690 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, NULL, 0, 0},
691 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, NULL, 0, 0},
692 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, NULL, 0, 0},
695 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, NULL, 0, D3DPS_VERSION(1,3)},
696 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, NULL, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
697 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, pshader_hw_map2gl, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
698 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, NULL, 0, D3DPS_VERSION(1,3)},
699 {D3DSIO_TEX, "texld", "undefined", 2, pshader_texld, NULL, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
700 {D3DSIO_TEX, "texld", "undefined", 3, pshader_texld, NULL, D3DPS_VERSION(2,0), -1},
701 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, NULL, 0, D3DPS_VERSION(1,3)},
702 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, NULL, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
703 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, NULL, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
704 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, NULL, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
705 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, NULL, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
706 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, NULL, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
707 {D3DSIO_TEXM3x3PAD, "texm3x3pad", "undefined", 2, pshader_texm3x3pad, NULL, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
708 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, NULL, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
709 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, NULL, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
710 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, NULL, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
711 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, NULL, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
712 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, NULL, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
713 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, NULL, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth, NULL, D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
715 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, NULL, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
716 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, NULL, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
717 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL, 1, pshader_texdepth, NULL, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
718 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, NULL, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
719 /* TODO: dp2add can be made out of multiple instuctions */
720 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, NULL, 0, 0},
721 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, NULL, 0, 0},
722 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, NULL, 0, 0},
723 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, NULL, 0, 0},
724 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, NULL, 0, 0},
725 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, NULL, 0, 0},
726 {0, NULL, NULL, 0, NULL, 0, 0}
729 inline static BOOL pshader_is_version_token(DWORD token) {
730 return 0xFFFF0000 == (token & 0xFFFF0000);
733 inline static BOOL pshader_is_comment_token(DWORD token) {
734 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
738 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
739 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
741 DWORD reg = param & D3DSP_REGNUM_MASK;
742 DWORD regtype = shader_get_regtype(param);
746 sprintf(regstr, "R%lu", reg);
750 strcpy(regstr, "fragment.color.primary");
752 strcpy(regstr, "fragment.color.secondary");
757 sprintf(regstr, "C%lu", reg);
759 sprintf(regstr, "program.env[%lu]", reg);
761 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
762 sprintf(regstr,"T%lu", reg);
765 sprintf(regstr, "%s", rastout_reg_names[reg]);
768 sprintf(regstr, "oD[%lu]", reg);
770 case D3DSPR_TEXCRDOUT:
771 sprintf(regstr, "oT[%lu]", reg);
774 FIXME("Unhandled register name Type(%ld)\n", regtype);
779 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
781 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
782 strcat(write_mask, ".");
783 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
784 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
785 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
786 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
790 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
791 static const char swizzle_reg_chars[] = "rgba";
792 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
793 DWORD swizzle_x = swizzle & 0x03;
794 DWORD swizzle_y = (swizzle >> 2) & 0x03;
795 DWORD swizzle_z = (swizzle >> 4) & 0x03;
796 DWORD swizzle_w = (swizzle >> 6) & 0x03;
798 * swizzle bits fields:
802 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
803 if (swizzle_x == swizzle_y &&
804 swizzle_x == swizzle_z &&
805 swizzle_x == swizzle_w) {
806 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
808 sprintf(swzstring, ".%c%c%c%c",
809 swizzle_reg_chars[swizzle_x],
810 swizzle_reg_chars[swizzle_y],
811 swizzle_reg_chars[swizzle_z],
812 swizzle_reg_chars[swizzle_w]);
817 static const char* shift_tab[] = {
818 "dummy", /* 0 (none) */
819 "coefmul.x", /* 1 (x2) */
820 "coefmul.y", /* 2 (x4) */
821 "coefmul.z", /* 3 (x8) */
822 "coefmul.w", /* 4 (x16) */
823 "dummy", /* 5 (x32) */
824 "dummy", /* 6 (x64) */
825 "dummy", /* 7 (x128) */
826 "dummy", /* 8 (d256) */
827 "dummy", /* 9 (d128) */
828 "dummy", /* 10 (d64) */
829 "dummy", /* 11 (d32) */
830 "coefdiv.w", /* 12 (d16) */
831 "coefdiv.z", /* 13 (d8) */
832 "coefdiv.y", /* 14 (d4) */
833 "coefdiv.x" /* 15 (d2) */
836 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
837 /* Generate a line that does the output modifier computation */
838 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
841 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
842 /* Generate a line that does the input modifier computation and return the input register to use */
843 static char regstr[256];
844 static char tmpline[256];
847 /* Assume a new line will be added */
850 /* Get register name */
851 get_register_name(instr, regstr, constants);
853 TRACE(" Register name %s\n", regstr);
854 switch (instr & D3DSP_SRCMOD_MASK) {
856 strcpy(outregstr, regstr);
860 sprintf(outregstr, "-%s", regstr);
864 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
866 case D3DSPSM_BIASNEG:
867 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
870 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
872 case D3DSPSM_SIGNNEG:
873 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
876 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
879 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
882 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
885 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
886 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
887 strcat(line, "\n"); /* Hack */
888 strcat(line, tmpline);
891 sprintf(line, "RCP T%c, %s.w;", 'A' + tmpreg, regstr);
892 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
893 strcat(line, "\n"); /* Hack */
894 strcat(line, tmpline);
897 strcpy(outregstr, regstr);
902 /* Substitute the register name */
903 sprintf(outregstr, "T%c", 'A' + tmpreg);
909 void pshader_set_version(
910 IWineD3DPixelShaderImpl *This,
913 DWORD major = (version >> 8) & 0x0F;
914 DWORD minor = version & 0x0F;
916 This->baseShader.hex_version = version;
917 This->baseShader.version = major * 10 + minor;
918 TRACE("ps_%lu_%lu\n", major, minor);
920 This->baseShader.limits.address = 0;
922 switch (This->baseShader.version) {
926 case 13: This->baseShader.limits.temporary = 2;
927 This->baseShader.limits.constant_float = 8;
928 This->baseShader.limits.constant_int = 0;
929 This->baseShader.limits.constant_bool = 0;
930 This->baseShader.limits.texture = 4;
933 case 14: This->baseShader.limits.temporary = 6;
934 This->baseShader.limits.constant_float = 8;
935 This->baseShader.limits.constant_int = 0;
936 This->baseShader.limits.constant_bool = 0;
937 This->baseShader.limits.texture = 6;
940 /* FIXME: temporaries must match D3DPSHADERCAPS2_0.NumTemps */
941 case 20: This->baseShader.limits.temporary = 32;
942 This->baseShader.limits.constant_float = 32;
943 This->baseShader.limits.constant_int = 16;
944 This->baseShader.limits.constant_bool = 16;
945 This->baseShader.limits.texture = 8;
948 case 30: This->baseShader.limits.temporary = 32;
949 This->baseShader.limits.constant_float = 224;
950 This->baseShader.limits.constant_int = 16;
951 This->baseShader.limits.constant_bool = 16;
952 This->baseShader.limits.texture = 0;
955 default: This->baseShader.limits.temporary = 32;
956 This->baseShader.limits.constant_float = 8;
957 This->baseShader.limits.constant_int = 0;
958 This->baseShader.limits.constant_bool = 0;
959 This->baseShader.limits.texture = 8;
960 FIXME("Unrecognized pixel shader version %lu!\n", version);
964 /* Map the opcode 1-to-1 to the GL code */
965 /* FIXME: fix CMP/CND, get rid of this switch */
966 void pshader_hw_map2gl(SHADER_OPCODE_ARG* arg) {
968 IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
969 CONST SHADER_OPCODE* curOpcode = arg->opcode;
970 SHADER_BUFFER* buffer = arg->buffer;
971 DWORD dst = arg->dst;
972 DWORD* src = arg->src;
976 char output_rname[256];
977 char output_wmask[20];
978 BOOL saturate = FALSE;
981 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
982 strcpy(tmpLine, curOpcode->glname);
984 /* Process modifiers */
985 if (0 != (dst & D3DSP_DSTMOD_MASK)) {
986 DWORD mask = dst & D3DSP_DSTMOD_MASK;
988 case D3DSPDM_SATURATE: saturate = TRUE; break;
989 #if 0 /* as yet unhandled modifiers */
990 case D3DSPDM_CENTROID: centroid = TRUE; break;
991 case D3DSPDM_PP: partialpresision = TRUE; break;
994 TRACE("_unhandled_modifier(0x%08lx)\n", mask);
997 shift = (dst & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
999 /* Generate input and output registers */
1000 if (curOpcode->num_params > 0) {
1002 char operands[4][100];
1006 /* Generate lines that handle input modifier computation */
1007 for (i = 1; i < curOpcode->num_params; ++i) {
1008 TRACE("(%p) : Param %u token %lx\n", This, i, src[i - 1]);
1009 if (gen_input_modifier_line(src[i - 1], i - 1, regs[i - 1], tmpOp, This->constants))
1010 shader_addline(buffer, tmpOp);
1013 /* Handle output register */
1014 get_register_name(dst, output_rname, This->constants);
1015 strcpy(operands[0], output_rname);
1016 get_write_mask(dst, output_wmask);
1017 strcat(operands[0], output_wmask);
1019 /* This function works because of side effects from gen_input_modifier_line */
1020 /* Handle input registers */
1021 for (i = 1; i < curOpcode->num_params; ++i) {
1022 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1023 strcpy(operands[i], regs[i - 1]);
1024 get_input_register_swizzle(src[i - 1], swzstring);
1025 strcat(operands[i], swzstring);
1028 switch(curOpcode->opcode) {
1030 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""),
1031 operands[0], operands[1], operands[3], operands[2]);
1034 shader_addline(buffer, "ADD TMP, -%s, coefdiv.x;\n", operands[1]);
1035 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""),
1036 operands[0], operands[2], operands[3]);
1040 if (saturate && (shift == 0))
1041 strcat(tmpLine, "_SAT");
1042 strcat(tmpLine, " ");
1043 strcat(tmpLine, operands[0]);
1044 for (i = 1; i < curOpcode->num_params; i++) {
1045 strcat(tmpLine, ", ");
1046 strcat(tmpLine, operands[i]);
1048 strcat(tmpLine,";\n");
1050 shader_addline(buffer, tmpLine);
1052 /* A shift requires another line. */
1054 gen_output_modifier_line(saturate, output_wmask, shift, output_rname, tmpLine);
1055 shader_addline(buffer, tmpLine);
1060 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
1061 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1062 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1063 const DWORD *pToken = pFunction;
1064 const SHADER_OPCODE *curOpcode = NULL;
1065 const DWORD *pInstr;
1068 #if 0 /* TODO: loop register (just another address register ) */
1069 BOOL hasLoops = FALSE;
1071 SHADER_BUFFER buffer;
1073 int row = 0; /* not sure, something to do with macros? */
1075 int version = This->baseShader.version;
1077 /* Keep bitmaps of used temporary and texture registers */
1078 DWORD tempsUsed, texUsed;
1080 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
1081 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
1082 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
1083 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
1084 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
1085 This->fixupVertexBufferSize = SHADER_PGMSIZE;
1086 This->fixupVertexBuffer[0] = 0;
1088 buffer.buffer = This->device->fixupVertexBuffer;
1090 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
1095 /* FIXME: if jumps are used, use GLSL, else use ARB_fragment_program */
1096 shader_addline(&buffer, "!!ARBfp1.0\n");
1098 /* TODO: Think about using a first pass to work out what's required for the second pass. */
1099 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
1100 This->constants[i] = 0;
1102 /* First pass: figure out which temporary and texture registers are used */
1103 shader_get_registers_used((IWineD3DBaseShader*) This, pToken, &tempsUsed, &texUsed);
1104 TRACE("Texture registers used: %#lx, Temp registers used %#lx\n", texUsed, tempsUsed);
1106 /* TODO: check register usage against GL/Directx limits, and fail if they're exceeded */
1108 /* Pre-declare registers */
1109 for(i = 0; i < This->baseShader.limits.texture; i++) {
1110 if (texUsed & (1 << i))
1111 shader_addline(&buffer,"TEMP T%lu;\n", i);
1114 for(i = 0; i < This->baseShader.limits.temporary; i++) {
1115 if (tempsUsed & (1 << i))
1116 shader_addline(&buffer, "TEMP R%lu;\n", i);
1119 /* Necessary for internal operations */
1120 shader_addline(&buffer, "TEMP TMP;\n");
1121 shader_addline(&buffer, "TEMP TMP2;\n");
1122 shader_addline(&buffer, "TEMP TA;\n");
1123 shader_addline(&buffer, "TEMP TB;\n");
1124 shader_addline(&buffer, "TEMP TC;\n");
1125 shader_addline(&buffer, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1126 shader_addline(&buffer, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1127 shader_addline(&buffer, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1129 /* Texture coordinate registers must be pre-loaded */
1130 for (i = 0; i < This->baseShader.limits.texture; i++) {
1131 if (texUsed & (1 << i))
1132 shader_addline(&buffer, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1135 /* Second pass, process opcodes */
1136 if (NULL != pToken) {
1137 while (D3DPS_END() != *pToken) {
1138 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1140 instructionSize = pToken & SIZEBITS >> 27;
1144 /* Skip version token */
1145 if (pshader_is_version_token(*pToken)) {
1150 /* Skip comment tokens */
1151 if (pshader_is_comment_token(*pToken)) {
1152 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1154 TRACE("#%s\n", (char*)pToken);
1155 pToken += comment_len;
1159 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1163 curOpcode = shader_get_opcode((IWineD3DBaseShader*) This, *pToken);
1165 if (NULL == curOpcode) {
1166 /* unknown current opcode ... (shouldn't be any!) */
1167 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1168 FIXME("unrecognized opcode: %08lx\n", *pToken);
1171 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1172 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1173 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1174 pToken += curOpcode->num_params;
1176 } else if (D3DSIO_DEF == curOpcode->opcode) {
1178 /* Handle definitions here, they don't fit well with the
1179 * other instructions below [for now ] */
1181 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1183 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1184 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1186 shader_addline(&buffer,
1187 "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1188 *((const float *)(pToken + 1)),
1189 *((const float *)(pToken + 2)),
1190 *((const float *)(pToken + 3)),
1191 *((const float *)(pToken + 4)) );
1193 This->constants[reg] = 1;
1197 /* If a generator function is set, use it */
1198 } else if (curOpcode->hw_fct != NULL) {
1200 SHADER_OPCODE_ARG hw_arg;
1202 hw_arg.shader = (IWineD3DBaseShader*) This;
1203 hw_arg.opcode = curOpcode;
1204 hw_arg.buffer = &buffer;
1205 if (curOpcode->num_params > 0) {
1206 hw_arg.dst = *pToken;
1208 /* FIXME: this does not account for relative address tokens */
1209 for (i = 1; i < curOpcode->num_params; i++)
1210 hw_arg.src[i-1] = *(pToken + i);
1213 curOpcode->hw_fct(&hw_arg);
1214 pToken += curOpcode->num_params;
1218 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1219 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1221 /* Build opcode for GL vertex_program */
1222 switch (curOpcode->opcode) {
1230 char reg_coord_swz[20] = "";
1231 DWORD reg_dest_code;
1232 DWORD reg_sampler_code;
1234 /* All versions have a destination register */
1235 reg_dest_code = *pToken & D3DSP_REGNUM_MASK;
1236 get_register_name(*pToken++, reg_dest, This->constants);
1238 /* 1.0-1.3: Use destination register as coordinate source. No modifiers.
1239 1.4: Use provided coordinate source register. _dw, _dz, swizzle allowed.
1240 2.0+: Use provided coordinate source register. No modifiers.
1241 3.0+: Use provided coordinate source register. Swizzle allowed */
1243 strcpy(reg_coord, reg_dest);
1245 else if (version == 14) {
1246 if (gen_input_modifier_line(*pToken, 0, reg_coord, tmpLine, This->constants))
1247 shader_addline(&buffer, tmpLine);
1248 get_input_register_swizzle(*pToken, reg_coord_swz);
1251 else if (version > 14 && version < 30) {
1252 get_register_name(*pToken, reg_coord, This->constants);
1255 else if (version >= 30) {
1256 get_input_register_swizzle(*pToken, reg_coord_swz);
1257 get_register_name(*pToken, reg_coord, This->constants);
1261 /* 1.0-1.4: Use destination register number as texture code.
1262 2.0+: Use provided sampler number as texure code. */
1264 reg_sampler_code = reg_dest_code;
1267 reg_sampler_code = *pToken & D3DSP_REGNUM_MASK;
1271 shader_addline(&buffer, "TEX %s, %s%s, texture[%lu], 2D;\n",
1272 reg_dest, reg_coord, reg_coord_swz, reg_sampler_code);
1275 case D3DSIO_TEXCOORD:
1278 get_write_mask(*pToken, tmp);
1279 if (version != 14) {
1280 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1281 shader_addline(&buffer, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1284 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1285 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1286 shader_addline(&buffer, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1291 case D3DSIO_TEXM3x2PAD:
1293 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1295 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1296 shader_addline(&buffer, tmpLine);
1297 shader_addline(&buffer, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1301 case D3DSIO_TEXM3x2TEX:
1303 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1305 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1306 shader_addline(&buffer, tmpLine);
1307 shader_addline(&buffer, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1308 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1312 case D3DSIO_TEXREG2AR:
1314 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1315 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1316 shader_addline(&buffer, "MOV TMP.r, T%lu.a;\n", reg2);
1317 shader_addline(&buffer, "MOV TMP.g, T%lu.r;\n", reg2);
1318 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1322 case D3DSIO_TEXREG2GB:
1324 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1325 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1326 shader_addline(&buffer, "MOV TMP.r, T%lu.g;\n", reg2);
1327 shader_addline(&buffer, "MOV TMP.g, T%lu.b;\n", reg2);
1328 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1334 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1335 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1337 /* FIXME: Should apply the BUMPMAPENV matrix */
1338 shader_addline(&buffer, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1339 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1343 case D3DSIO_TEXM3x3PAD:
1345 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1347 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1348 shader_addline(&buffer, tmpLine);
1349 shader_addline(&buffer, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1354 case D3DSIO_TEXM3x3TEX:
1356 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1358 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1359 shader_addline(&buffer, tmpLine);
1360 shader_addline(&buffer, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1362 /* Cubemap textures will be more used than 3D ones. */
1363 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1368 case D3DSIO_TEXM3x3VSPEC:
1370 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1372 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1373 shader_addline(&buffer, tmpLine);
1374 shader_addline(&buffer, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1376 /* Construct the eye-ray vector from w coordinates */
1377 shader_addline(&buffer, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1378 shader_addline(&buffer, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1379 shader_addline(&buffer, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1381 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1382 shader_addline(&buffer, "DP3 TMP.w, TMP, TMP2;\n");
1383 shader_addline(&buffer, "MUL TMP, TMP.w, TMP;\n");
1384 shader_addline(&buffer, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1386 /* Cubemap textures will be more used than 3D ones. */
1387 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1392 case D3DSIO_TEXM3x3SPEC:
1394 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1395 DWORD reg3 = *(pToken + 2) & D3DSP_REGNUM_MASK;
1397 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants))
1398 shader_addline(&buffer, tmpLine);
1399 shader_addline(&buffer, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1401 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1402 shader_addline(&buffer, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1403 shader_addline(&buffer, "MUL TMP, TMP.w, TMP;\n");
1404 shader_addline(&buffer, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1406 /* Cubemap textures will be more used than 3D ones. */
1407 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1414 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1415 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1417 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1419 pToken += curOpcode->num_params;
1424 /* TODO: What about result.depth? */
1425 shader_addline(&buffer, "MOV result.color, R0;\n");
1426 shader_addline(&buffer, "END\n");
1429 /* finally null terminate the buffer */
1430 buffer.buffer[buffer.bsize] = 0;
1431 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1432 /* Create the hw shader */
1434 /* The program string sometimes gets too long for a normal TRACE */
1435 TRACE("Generated program:\n");
1436 if (TRACE_ON(d3d_shader)) {
1437 fprintf(stderr, "%s\n", buffer.buffer);
1440 /* TODO: change to resource.glObjectHandel or something like that */
1441 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
1443 TRACE("Creating a hw pixel shader, prg=%d\n", This->baseShader.prgId);
1444 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->baseShader.prgId));
1446 TRACE("Created hw pixel shader, prg=%d\n", This->baseShader.prgId);
1447 /* Create the program and check for errors */
1448 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
1449 buffer.bsize, buffer.buffer));
1451 if (glGetError() == GL_INVALID_OPERATION) {
1453 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1454 FIXME("HW PixelShader Error at position %d: %s\n",
1455 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1456 This->baseShader.prgId = -1;
1459 #if 1 /* if were using the data buffer of device then we don't need to free it */
1460 HeapFree(GetProcessHeap(), 0, buffer.buffer);
1464 inline static void pshader_program_dump_ins_modifiers(const DWORD output) {
1466 DWORD shift = (output & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1467 DWORD mmask = output & D3DSP_DSTMOD_MASK;
1471 case 13: TRACE("_d8"); break;
1472 case 14: TRACE("_d4"); break;
1473 case 15: TRACE("_d2"); break;
1474 case 1: TRACE("_x2"); break;
1475 case 2: TRACE("_x4"); break;
1476 case 3: TRACE("_x8"); break;
1477 default: TRACE("_unhandled_shift(%ld)", shift); break;
1481 case D3DSPDM_NONE: break;
1482 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1483 case D3DSPDM_PARTIALPRECISION: TRACE("_pp"); break;
1484 case D3DSPDM_MSAMPCENTROID: TRACE("_centroid"); break;
1485 default: TRACE("_unhandled_modifier(%#lx)", mmask); break;
1489 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1490 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1491 static const char swizzle_reg_chars[] = "rgba";
1493 DWORD reg = param & D3DSP_REGNUM_MASK;
1494 DWORD regtype = shader_get_regtype(param);
1497 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1498 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1499 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1500 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1502 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1514 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1517 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1520 case D3DSPR_RASTOUT:
1521 TRACE("%s", rastout_reg_names[reg]);
1523 case D3DSPR_ATTROUT:
1524 TRACE("oD%lu", reg);
1526 case D3DSPR_TEXCRDOUT:
1527 TRACE("oT%lu", reg);
1529 case D3DSPR_CONSTINT:
1530 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1532 case D3DSPR_CONSTBOOL:
1533 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1539 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1541 case D3DSPR_SAMPLER:
1545 TRACE("unhandled_rtype(%lx)", regtype);
1550 /* operand output (for modifiers and shift, see dump_ins_modifiers) */
1552 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1554 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1555 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1556 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1557 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1560 /** operand input */
1561 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1562 DWORD swizzle_r = swizzle & 0x03;
1563 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1564 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1565 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1567 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1568 DWORD mask = param & D3DSP_SRCMOD_MASK;
1569 /*TRACE("_modifier(0x%08lx) ", mask);*/
1571 case D3DSPSM_NONE: break;
1572 case D3DSPSM_NEG: break;
1573 case D3DSPSM_BIAS: TRACE("_bias"); break;
1574 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1575 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1576 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1577 case D3DSPSM_COMP: break;
1578 case D3DSPSM_X2: TRACE("_x2"); break;
1579 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1580 case D3DSPSM_DZ: TRACE("_dz"); break;
1581 case D3DSPSM_DW: TRACE("_dw"); break;
1583 TRACE("_unknown(0x%08lx)", mask);
1588 * swizzle bits fields:
1591 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1592 if (swizzle_r == swizzle_g &&
1593 swizzle_r == swizzle_b &&
1594 swizzle_r == swizzle_a) {
1595 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1598 swizzle_reg_chars[swizzle_r],
1599 swizzle_reg_chars[swizzle_g],
1600 swizzle_reg_chars[swizzle_b],
1601 swizzle_reg_chars[swizzle_a]);
1607 inline static void pshader_program_dump_decl_usage(
1608 IWineD3DPixelShaderImpl *This, DWORD decl, DWORD param) {
1610 DWORD regtype = shader_get_regtype(param);
1613 if (regtype == D3DSPR_SAMPLER) {
1614 DWORD ttype = decl & D3DSP_TEXTURETYPE_MASK;
1617 case D3DSTT_2D: TRACE("2d "); break;
1618 case D3DSTT_CUBE: TRACE("cube "); break;
1619 case D3DSTT_VOLUME: TRACE("volume "); break;
1620 default: TRACE("unknown_ttype(%08lx) ", ttype);
1625 DWORD usage = decl & D3DSP_DCL_USAGE_MASK;
1626 DWORD idx = (decl & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
1629 case D3DDECLUSAGE_POSITION:
1630 TRACE("%s%ld ", "position", idx);
1632 case D3DDECLUSAGE_BLENDINDICES:
1633 TRACE("%s ", "blend");
1635 case D3DDECLUSAGE_BLENDWEIGHT:
1636 TRACE("%s ", "weight");
1638 case D3DDECLUSAGE_NORMAL:
1639 TRACE("%s%ld ", "normal", idx);
1641 case D3DDECLUSAGE_PSIZE:
1642 TRACE("%s ", "psize");
1644 case D3DDECLUSAGE_COLOR:
1646 TRACE("%s ", "color");
1648 TRACE("%s%ld ", "specular", (idx - 1));
1651 case D3DDECLUSAGE_TEXCOORD:
1652 TRACE("%s%ld ", "texture", idx);
1654 case D3DDECLUSAGE_TANGENT:
1655 TRACE("%s ", "tangent");
1657 case D3DDECLUSAGE_BINORMAL:
1658 TRACE("%s ", "binormal");
1660 case D3DDECLUSAGE_TESSFACTOR:
1661 TRACE("%s ", "tessfactor");
1663 case D3DDECLUSAGE_POSITIONT:
1664 TRACE("%s%ld ", "positionT", idx);
1666 case D3DDECLUSAGE_FOG:
1667 TRACE("%s ", "fog");
1669 case D3DDECLUSAGE_DEPTH:
1670 TRACE("%s ", "depth");
1672 case D3DDECLUSAGE_SAMPLE:
1673 TRACE("%s ", "sample");
1676 FIXME("Unrecognised dcl %08lx", usage);
1681 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1682 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1683 const DWORD* pToken = pFunction;
1684 const SHADER_OPCODE *curOpcode = NULL;
1687 TRACE("(%p) : Parsing programme\n", This);
1689 if (NULL != pToken) {
1690 while (D3DPS_END() != *pToken) {
1691 if (pshader_is_version_token(*pToken)) { /** version */
1692 pshader_set_version(This, *pToken);
1697 if (pshader_is_comment_token(*pToken)) { /** comment */
1698 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1700 TRACE("//%s\n", (char*)pToken);
1701 pToken += comment_len;
1702 len += comment_len + 1;
1705 if (!This->baseShader.version) {
1706 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1708 curOpcode = shader_get_opcode((IWineD3DBaseShader*) This, *pToken);
1711 if (NULL == curOpcode) {
1713 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1714 while (*pToken & 0x80000000) {
1716 /* unknown current opcode ... */
1717 TRACE("unrecognized opcode: %08lx", *pToken);
1724 if (curOpcode->opcode == D3DSIO_DCL) {
1725 pshader_program_dump_decl_usage(This, *pToken, *(pToken + 1));
1728 pshader_program_dump_ps_param(*pToken, 0);
1732 if (curOpcode->opcode == D3DSIO_DEF) {
1733 TRACE("def c%lu = ", *pToken & 0xFF);
1736 TRACE("%f ,", *(float *)pToken);
1739 TRACE("%f ,", *(float *)pToken);
1742 TRACE("%f ,", *(float *)pToken);
1745 TRACE("%f", *(float *)pToken);
1749 TRACE("%s", curOpcode->name);
1750 if (curOpcode->num_params > 0) {
1751 pshader_program_dump_ins_modifiers(*pToken);
1753 pshader_program_dump_ps_param(*pToken, 0);
1756 for (i = 1; i < curOpcode->num_params; ++i) {
1758 pshader_program_dump_ps_param(*pToken, 1);
1767 This->baseShader.functionLength = (len + 1) * sizeof(DWORD);
1769 This->baseShader.functionLength = 1; /* no Function defined use fixed function vertex processing */
1772 /* Generate HW shader in needed */
1773 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1774 TRACE("(%p) : Generating hardware program\n", This);
1776 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1780 TRACE("(%p) : Copying the function\n", This);
1781 /* copy the function ... because it will certainly be released by application */
1782 if (NULL != pFunction) {
1783 This->baseShader.function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1784 memcpy((void *)This->baseShader.function, pFunction, This->baseShader.functionLength);
1786 This->baseShader.function = NULL;
1789 /* TODO: Some proper return values for failures */
1790 TRACE("(%p) : Returning WINED3D_OK\n", This);
1794 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1796 /*** IUnknown methods ***/
1797 IWineD3DPixelShaderImpl_QueryInterface,
1798 IWineD3DPixelShaderImpl_AddRef,
1799 IWineD3DPixelShaderImpl_Release,
1800 /*** IWineD3DBase methods ***/
1801 IWineD3DPixelShaderImpl_GetParent,
1802 /*** IWineD3DBaseShader methods ***/
1803 IWineD3DPixelShaderImpl_SetFunction,
1804 /*** IWineD3DPixelShader methods ***/
1805 IWineD3DPixelShaderImpl_GetDevice,
1806 IWineD3DPixelShaderImpl_GetFunction