2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
37 # define TRACE_VSVECTOR(name)
40 /* The maximum size of the program */
43 #define REGMASK 0x00001FFF
45 #define GLNAME_REQUIRE_GLSL ((const char *)1)
46 /* *******************************************
47 IWineD3DPixelShader IUnknown parts follow
48 ******************************************* */
49 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
51 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
52 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
53 if (IsEqualGUID(riid, &IID_IUnknown)
54 || IsEqualGUID(riid, &IID_IWineD3DBase)
55 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
56 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
57 IUnknown_AddRef(iface);
64 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
65 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
66 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
67 return InterlockedIncrement(&This->ref);
70 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
71 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
73 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
74 ref = InterlockedDecrement(&This->ref);
76 HeapFree(GetProcessHeap(), 0, This);
81 /* TODO: At the momeny the function parser is single pass, it achievs this
82 by passing constants to a couple of functions where they are then modified.
83 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
84 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
87 /* *******************************************
88 IWineD3DPixelShader IWineD3DPixelShader parts follow
89 ******************************************* */
91 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
92 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
94 *parent = This->parent;
95 IUnknown_AddRef(*parent);
96 TRACE("(%p) : returning %p\n", This, *parent);
100 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
101 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
102 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
103 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
104 TRACE("(%p) returning %p\n", This, *pDevice);
109 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
110 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
111 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
114 *pSizeOfData = This->baseShader.functionLength;
117 if (*pSizeOfData < This->baseShader.functionLength) {
118 *pSizeOfData = This->baseShader.functionLength;
119 return WINED3DERR_MOREDATA;
121 if (NULL == This->baseShader.function) { /* no function defined */
122 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
123 (*(DWORD **) pData) = NULL;
125 if (This->baseShader.functionLength == 0) {
128 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
129 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
134 /*******************************
135 * pshader functions software VM
138 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
139 d->x = s0->x + s1->x;
140 d->y = s0->y + s1->y;
141 d->z = s0->z + s1->z;
142 d->w = s0->w + s1->w;
143 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
144 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
147 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
148 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
149 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
150 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
153 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
154 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
155 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
156 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
159 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
161 d->y = s0->y * s1->y;
164 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
165 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
168 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
174 tmp.f = floorf(s0->w);
175 d->x = powf(2.0f, tmp.f);
176 d->y = s0->w - tmp.f;
177 tmp.f = powf(2.0f, s0->w);
178 tmp.d &= 0xFFFFFF00U;
181 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
182 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
185 void pshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
187 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
188 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
190 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
191 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
194 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
195 float tmp_f = fabsf(s0->w);
196 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
197 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
198 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
201 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
202 d->x = s0->x * s1->x + s2->x;
203 d->y = s0->y * s1->y + s2->y;
204 d->z = s0->z * s1->z + s2->z;
205 d->w = s0->w * s1->w + s2->w;
206 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
207 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
210 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
211 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
212 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
213 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
214 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
215 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
216 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
219 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
220 d->x = (s0->x < s1->x) ? s0->x : s1->x;
221 d->y = (s0->y < s1->y) ? s0->y : s1->y;
222 d->z = (s0->z < s1->z) ? s0->z : s1->z;
223 d->w = (s0->w < s1->w) ? s0->w : s1->w;
224 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
225 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
228 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
233 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
234 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
237 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
238 d->x = s0->x * s1->x;
239 d->y = s0->y * s1->y;
240 d->z = s0->z * s1->z;
241 d->w = s0->w * s1->w;
242 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
243 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
246 void pshader_nop(void) {
247 /* NOPPPP ahhh too easy ;) */
248 PSTRACE(("executing nop\n"));
251 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
252 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
253 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
254 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
257 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
258 float tmp_f = fabsf(s0->w);
259 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
260 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
261 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
264 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
265 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
266 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
267 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
268 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
269 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
270 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
273 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
274 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
275 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
276 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
277 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
278 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
279 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
282 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
283 d->x = s0->x - s1->x;
284 d->y = s0->y - s1->y;
285 d->z = s0->z - s1->z;
286 d->w = s0->w - s1->w;
287 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
288 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
292 * Version 1.1 specific
295 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
296 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
297 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
298 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
301 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
302 float tmp_f = fabsf(s0->w);
303 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
304 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
305 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
308 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
309 d->x = s0->x - floorf(s0->x);
310 d->y = s0->y - floorf(s0->y);
313 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
314 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
317 typedef FLOAT D3DMATRIX44[4][4];
318 typedef FLOAT D3DMATRIX43[4][3];
319 typedef FLOAT D3DMATRIX34[3][4];
320 typedef FLOAT D3DMATRIX33[3][3];
321 typedef FLOAT D3DMATRIX23[2][3];
323 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
325 * Buggy CODE: here only if cast not work for copy/paste
326 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
327 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
328 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
329 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
330 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
331 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
332 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
334 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
335 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
336 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
337 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
338 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
339 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
340 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
341 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
344 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
345 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
346 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
347 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
349 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
350 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
351 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
352 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
355 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
356 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
357 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
358 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
359 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
360 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
361 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
362 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
363 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
366 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
367 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
368 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
369 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
371 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
372 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
373 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
374 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
377 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
379 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
380 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
386 * Version 2.0 specific
388 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
389 d->x = s0->x * (s1->x - s2->x) + s2->x;
390 d->y = s0->y * (s1->y - s2->y) + s2->y;
391 d->z = s0->z * (s1->z - s2->z) + s2->z;
392 d->w = s0->w * (s1->w - s2->w) + s2->w;
395 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
396 d->x = s0->y * s1->z - s0->z * s1->y;
397 d->y = s0->z * s1->x - s0->x * s1->z;
398 d->z = s0->x * s1->y - s0->y * s1->x;
399 d->w = 0.9f; /* w is undefined, so set it to something safeish */
401 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
402 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
405 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
410 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
411 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
415 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
419 void pshader_texkill(WINED3DSHADERVECTOR* d) {
423 void pshader_tex(WINED3DSHADERVECTOR* d) {
426 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
430 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
434 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
438 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
442 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
446 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
450 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
454 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
458 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
462 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
466 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
470 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
474 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
478 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
479 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
483 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
487 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
491 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
495 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
499 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
503 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
507 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
511 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
515 void pshader_call(WINED3DSHADERVECTOR* d) {
519 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
523 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
527 void pshader_ret(void) {
531 void pshader_endloop(void) {
535 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
539 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
543 void pshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
547 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
551 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
555 void pshader_rep(WINED3DSHADERVECTOR* d) {
559 void pshader_endrep(void) {
563 void pshader_if(WINED3DSHADERVECTOR* d) {
567 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
571 void pshader_else(void) {
575 void pshader_label(WINED3DSHADERVECTOR* d) {
579 void pshader_endif(void) {
583 void pshader_break(void) {
587 void pshader_breakc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
591 void pshader_breakp(WINED3DSHADERVECTOR* d) {
595 void pshader_mova(WINED3DSHADERVECTOR* d) {
599 void pshader_defb(WINED3DSHADERVECTOR* d) {
603 void pshader_defi(WINED3DSHADERVECTOR* d) {
607 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
611 void pshader_dsx(WINED3DSHADERVECTOR* d) {
615 void pshader_dsy(WINED3DSHADERVECTOR* d) {
619 void pshader_texldd(WINED3DSHADERVECTOR* d) {
623 void pshader_setp(WINED3DSHADERVECTOR* d) {
627 void pshader_texldl(WINED3DSHADERVECTOR* d) {
632 * log, exp, frc, m*x* seems to be macros ins ... to see
634 CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[] = {
635 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
636 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
637 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
638 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
639 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
640 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
641 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
642 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
643 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
644 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
645 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
646 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
647 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
648 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
649 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
650 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
651 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
652 {D3DSIO_LIT, "lit", "LIT", 2, pshader_lit, 0, 0},
653 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
654 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
655 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
656 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
657 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
658 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
659 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
660 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
663 /** FIXME: use direct access so add the others opcodes as stubs */
664 /* DCL is a specil operation */
665 {D3DSIO_DCL, "dcl", NULL, 1, pshader_dcl, 0, 0},
666 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
667 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
668 /* TODO: sng can possibly be performed as
671 {D3DSIO_SGN, "sng", NULL, 2, pshader_sng, 0, 0},
672 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
675 MUL vec.xyz, vec, tmp;
676 but I think this is better because it accounts for w properly.
682 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
683 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
685 /* Flow control - requires GLSL or software shaders */
686 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 1, pshader_rep, 0, 0},
687 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
688 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 1, pshader_if, 0, 0},
689 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
690 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 0, pshader_else, 0, 0},
691 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 0, pshader_endif, 0, 0},
692 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 0, pshader_break, 0, 0},
693 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
694 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 1, pshader_breakp, 0, 0},
695 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
696 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
697 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
698 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
699 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
700 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
702 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, pshader_mova, 0, 0},
703 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
704 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
706 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
707 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
708 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
709 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
710 {D3DSIO_TEX, "texld", "undefined", 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
711 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 3, pshader_texld, D3DPS_VERSION(2,0), -1},
712 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
713 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
715 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
716 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
717 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
718 {D3DSIO_TEXM3x3PAD, "texm3x3pad", "undefined", 2, pshader_texm3x3pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
720 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
721 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
722 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
723 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
724 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
725 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
726 /* def is a special operation */
727 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
728 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
729 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
730 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
731 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
732 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
733 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
734 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
735 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
736 /* TODO: dp2add can be made out of multiple instuctions */
737 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
738 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
739 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
740 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
741 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
742 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
743 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
744 {0, NULL, NULL, 0, NULL, 0, 0}
748 inline static const SHADER_OPCODE* pshader_program_get_opcode(IWineD3DPixelShaderImpl *This, const DWORD code) {
750 DWORD version = This->baseShader.version;
751 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
752 const SHADER_OPCODE *shader_ins = This->baseShader.shader_ins;
754 /** TODO: use dichotomic search */
755 while (NULL != shader_ins[i].name) {
756 if (((code & D3DSI_OPCODE_MASK) == shader_ins[i].opcode) &&
757 (((hex_version >= shader_ins[i].min_version) && (hex_version <= shader_ins[i].max_version)) ||
758 ((shader_ins[i].min_version == 0) && (shader_ins[i].max_version == 0)))) {
759 return &shader_ins[i];
763 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code, code, code & D3DSI_OPCODE_MASK, version);
767 inline static BOOL pshader_is_version_token(DWORD token) {
768 return 0xFFFF0000 == (token & 0xFFFF0000);
771 inline static BOOL pshader_is_comment_token(DWORD token) {
772 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
776 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
777 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
779 DWORD reg = param & REGMASK;
780 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
784 sprintf(regstr, "R%lu", reg);
788 strcpy(regstr, "fragment.color.primary");
790 strcpy(regstr, "fragment.color.secondary");
795 sprintf(regstr, "C%lu", reg);
797 sprintf(regstr, "program.env[%lu]", reg);
799 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
800 sprintf(regstr,"T%lu", reg);
803 sprintf(regstr, "%s", rastout_reg_names[reg]);
806 sprintf(regstr, "oD[%lu]", reg);
808 case D3DSPR_TEXCRDOUT:
809 sprintf(regstr, "oT[%lu]", reg);
812 FIXME("Unhandled register name Type(%ld)\n", regtype);
817 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
819 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
820 strcat(write_mask, ".");
821 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
822 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
823 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
824 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
828 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
829 static const char swizzle_reg_chars[] = "rgba";
830 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
831 DWORD swizzle_x = swizzle & 0x03;
832 DWORD swizzle_y = (swizzle >> 2) & 0x03;
833 DWORD swizzle_z = (swizzle >> 4) & 0x03;
834 DWORD swizzle_w = (swizzle >> 6) & 0x03;
836 * swizzle bits fields:
840 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
841 if (swizzle_x == swizzle_y &&
842 swizzle_x == swizzle_z &&
843 swizzle_x == swizzle_w) {
844 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
846 sprintf(swzstring, ".%c%c%c%c",
847 swizzle_reg_chars[swizzle_x],
848 swizzle_reg_chars[swizzle_y],
849 swizzle_reg_chars[swizzle_z],
850 swizzle_reg_chars[swizzle_w]);
855 inline static void addline(unsigned int *lineNum, char *pgm, unsigned int *pgmLength, char *line) {
856 int lineLen = strlen(line);
857 if(lineLen + *pgmLength > PGMSIZE - 1 /* - 1 to allow a NULL at the end */) {
858 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, lineLen + *pgmLength);
861 memcpy(pgm + *pgmLength, line, lineLen);
864 *pgmLength += lineLen;
866 TRACE("GL HW (%u, %u) : %s", *lineNum, *pgmLength, line);
869 static const char* shift_tab[] = {
870 "dummy", /* 0 (none) */
871 "coefmul.x", /* 1 (x2) */
872 "coefmul.y", /* 2 (x4) */
873 "coefmul.z", /* 3 (x8) */
874 "coefmul.w", /* 4 (x16) */
875 "dummy", /* 5 (x32) */
876 "dummy", /* 6 (x64) */
877 "dummy", /* 7 (x128) */
878 "dummy", /* 8 (d256) */
879 "dummy", /* 9 (d128) */
880 "dummy", /* 10 (d64) */
881 "dummy", /* 11 (d32) */
882 "coefdiv.w", /* 12 (d16) */
883 "coefdiv.z", /* 13 (d8) */
884 "coefdiv.y", /* 14 (d4) */
885 "coefdiv.x" /* 15 (d2) */
888 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
889 /* Generate a line that does the output modifier computation */
890 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
893 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
894 /* Generate a line that does the input modifier computation and return the input register to use */
895 static char regstr[256];
896 static char tmpline[256];
899 /* Assume a new line will be added */
902 /* Get register name */
903 get_register_name(instr, regstr, constants);
905 TRACE(" Register name %s\n", regstr);
906 switch (instr & D3DSP_SRCMOD_MASK) {
908 strcpy(outregstr, regstr);
912 sprintf(outregstr, "-%s", regstr);
916 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
918 case D3DSPSM_BIASNEG:
919 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
922 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
924 case D3DSPSM_SIGNNEG:
925 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
928 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
931 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
934 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
937 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
938 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
939 strcat(line, "\n"); /* Hack */
940 strcat(line, tmpline);
943 sprintf(line, "RCP T%c, %s.w;", 'A' + tmpreg, regstr);
944 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
945 strcat(line, "\n"); /* Hack */
946 strcat(line, tmpline);
949 strcpy(outregstr, regstr);
954 /* Substitute the register name */
955 sprintf(outregstr, "T%c", 'A' + tmpreg);
960 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
961 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
962 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
963 const DWORD *pToken = pFunction;
964 const SHADER_OPCODE *curOpcode = NULL;
967 unsigned lineNum = 0; /* The line number of the generated program (for loging)*/
968 char *pgmStr = NULL; /* A pointer to the program data generated by this function */
970 #if 0 /* TODO: loop register (just another address register ) */
971 BOOL hasLoops = FALSE;
974 BOOL saturate; /* clamp to 0.0 -> 1.0*/
975 int row = 0; /* not sure, something to do with macros? */
977 int version = 0; /* The version of the shader */
979 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
980 unsigned int pgmLength = 0;
982 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
983 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
984 if (This->device->fixupVertexBufferSize < PGMSIZE) {
985 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
986 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
987 This->fixupVertexBufferSize = PGMSIZE;
988 This->fixupVertexBuffer[0] = 0;
990 pgmStr = This->device->fixupVertexBuffer;
992 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, PGMSIZE); /* 64kb should be enough */
996 /* TODO: Think about using a first pass to work out what's required for the second pass. */
997 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
998 This->constants[i] = 0;
1000 if (NULL != pToken) {
1001 while (D3DPS_END() != *pToken) {
1002 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1004 instructionSize = pToken & SIZEBITS >> 27;
1007 if (pshader_is_version_token(*pToken)) { /** version */
1011 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1012 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1014 TRACE("found version token ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1016 /* Each release of pixel shaders has had different numbers of temp registers */
1022 case 14: numTemps=12;
1024 strcpy(tmpLine, "!!ARBfp1.0\n");
1026 case 20: numTemps=12;
1028 strcpy(tmpLine, "!!ARBfp2.0\n");
1029 FIXME("No work done yet to support ps2.0 in hw\n");
1031 case 30: numTemps=32;
1033 strcpy(tmpLine, "!!ARBfp3.0\n");
1034 FIXME("No work done yet to support ps3.0 in hw\n");
1039 strcpy(tmpLine, "!!ARBfp1.0\n");
1040 FIXME("Unrecognized pixel shader version!\n");
1042 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1044 /* TODO: find out how many registers are really needed */
1045 for(i = 0; i < 6; i++) {
1046 sprintf(tmpLine, "TEMP T%lu;\n", i);
1047 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1050 for(i = 0; i < 6; i++) {
1051 sprintf(tmpLine, "TEMP R%lu;\n", i);
1052 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1055 sprintf(tmpLine, "TEMP TMP;\n");
1056 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1057 sprintf(tmpLine, "TEMP TMP2;\n");
1058 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1059 sprintf(tmpLine, "TEMP TA;\n");
1060 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1061 sprintf(tmpLine, "TEMP TB;\n");
1062 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1063 sprintf(tmpLine, "TEMP TC;\n");
1064 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1066 strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1067 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1068 strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1069 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1070 strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1071 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1073 for(i = 0; i < 4; i++) {
1074 sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1075 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1082 if (pshader_is_comment_token(*pToken)) { /** comment */
1083 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1085 TRACE("#%s\n", (char*)pToken);
1086 pToken += comment_len;
1090 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1094 curOpcode = pshader_program_get_opcode(This, *pToken);
1096 if (NULL == curOpcode) {
1097 /* unknown current opcode ... (shouldn't be any!) */
1098 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1099 FIXME("unrecognized opcode: %08lx\n", *pToken);
1102 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1103 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1104 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1105 pToken += curOpcode->num_params;
1107 } else if (D3DSIO_DEF == curOpcode->opcode) {
1109 /* Handle definitions here, they don't fit well with the
1110 * other instructions below [for now ] */
1112 DWORD reg = *pToken & REGMASK;
1114 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1115 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1117 sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1118 *((const float *)(pToken + 1)),
1119 *((const float *)(pToken + 2)),
1120 *((const float *)(pToken + 3)),
1121 *((const float *)(pToken + 4)) );
1123 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1125 This->constants[reg] = 1;
1131 /* Common processing: [inst] [dst] [src]* */
1134 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1135 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1139 /* Build opcode for GL vertex_program */
1140 switch (curOpcode->opcode) {
1167 case D3DSIO_TEXKILL:
1168 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
1169 strcpy(tmpLine, curOpcode->glname);
1174 get_write_mask(*pToken, tmp);
1175 if (version != 14) {
1176 DWORD reg = *pToken & REGMASK;
1177 sprintf(tmpLine,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg, tmp, reg, reg);
1178 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1182 DWORD reg1 = *pToken & REGMASK;
1183 if (gen_input_modifier_line(*++pToken, 0, reg2, tmpLine, This->constants)) {
1184 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1186 sprintf(tmpLine,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1, tmp, reg2, reg1);
1187 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1193 case D3DSIO_TEXCOORD:
1196 get_write_mask(*pToken, tmp);
1197 if (version != 14) {
1198 DWORD reg = *pToken & REGMASK;
1199 sprintf(tmpLine, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1200 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1203 DWORD reg1 = *pToken & REGMASK;
1204 DWORD reg2 = *++pToken & REGMASK;
1205 sprintf(tmpLine, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1206 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1212 case D3DSIO_TEXM3x2PAD:
1214 DWORD reg = *pToken & REGMASK;
1216 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1217 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1219 sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1220 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1225 case D3DSIO_TEXM3x2TEX:
1227 DWORD reg = *pToken & REGMASK;
1229 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1230 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1232 sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1233 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1234 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1235 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1240 case D3DSIO_TEXREG2AR:
1242 DWORD reg1 = *pToken & REGMASK;
1243 DWORD reg2 = *++pToken & REGMASK;
1244 sprintf(tmpLine, "MOV TMP.r, T%lu.a;\n", reg2);
1245 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1246 sprintf(tmpLine, "MOV TMP.g, T%lu.r;\n", reg2);
1247 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1248 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1249 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1254 case D3DSIO_TEXREG2GB:
1256 DWORD reg1 = *pToken & REGMASK;
1257 DWORD reg2 = *++pToken & REGMASK;
1258 sprintf(tmpLine, "MOV TMP.r, T%lu.g;\n", reg2);
1259 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1260 sprintf(tmpLine, "MOV TMP.g, T%lu.b;\n", reg2);
1261 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1262 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1263 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1270 DWORD reg1 = *pToken & REGMASK;
1271 DWORD reg2 = *++pToken & REGMASK;
1273 /* FIXME: Should apply the BUMPMAPENV matrix */
1274 sprintf(tmpLine, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1275 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1276 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1277 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1282 case D3DSIO_TEXM3x3PAD:
1284 DWORD reg = *pToken & REGMASK;
1286 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1287 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1289 sprintf(tmpLine, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1290 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1296 case D3DSIO_TEXM3x3TEX:
1298 DWORD reg = *pToken & REGMASK;
1300 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1301 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1304 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1305 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1307 /* Cubemap textures will be more used than 3D ones. */
1308 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1309 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1314 case D3DSIO_TEXM3x3VSPEC:
1316 DWORD reg = *pToken & REGMASK;
1318 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1319 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1321 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1322 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1324 /* Construct the eye-ray vector from w coordinates */
1325 sprintf(tmpLine, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1326 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1327 sprintf(tmpLine, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1328 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1329 sprintf(tmpLine, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1330 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1332 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1333 sprintf(tmpLine, "DP3 TMP.w, TMP, TMP2;\n");
1334 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1335 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1336 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1337 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1338 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1340 /* Cubemap textures will be more used than 3D ones. */
1341 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1342 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1348 case D3DSIO_TEXM3x3SPEC:
1350 DWORD reg = *pToken & REGMASK;
1351 DWORD reg3 = *(pToken + 2) & REGMASK;
1353 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants)) {
1354 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1356 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1357 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1359 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1360 sprintf(tmpLine, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1361 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1363 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1364 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1365 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1366 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1368 /* Cubemap textures will be more used than 3D ones. */
1369 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1370 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1378 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1379 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1381 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1383 pToken += curOpcode->num_params;
1387 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1388 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1390 case D3DSPDM_SATURATE: saturate = TRUE; break;
1391 #if 0 /* as yet unhandled modifiers */
1392 case D3DSPDM_CENTROID: centroid = TRUE; break;
1393 case D3DSPDM_PP: partialpresision = TRUE; break;
1394 case D3DSPDM_X2: X2 = TRUE; break;
1395 case D3DSPDM_X4: X4 = TRUE; break;
1396 case D3DSPDM_X8: X8 = TRUE; break;
1397 case D3DSPDM_D2: D2 = TRUE; break;
1398 case D3DSPDM_D4: D4 = TRUE; break;
1399 case D3DSPDM_D8: D8 = TRUE; break;
1402 TRACE("_unhandled_modifier(0x%08lx)\n", mask);
1406 /* Generate input and output registers */
1407 if (curOpcode->num_params > 0) {
1409 char operands[4][100];
1414 /* Generate lines that handle input modifier computation */
1415 for (i = 1; i < curOpcode->num_params; ++i) {
1416 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1417 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpOp, This->constants)) {
1418 addline(&lineNum, pgmStr, &pgmLength, tmpOp);
1422 /* Handle saturation only when no shift is present in the output modifier */
1423 if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
1426 /* Handle output register */
1427 get_register_name(*pToken, tmpOp, This->constants);
1428 strcpy(operands[0], tmpOp);
1429 get_write_mask(*pToken, tmpOp);
1430 strcat(operands[0], tmpOp);
1432 /* This function works because of side effects from gen_input_modifier_line */
1433 /* Handle input registers */
1434 for (i = 1; i < curOpcode->num_params; ++i) {
1435 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1436 strcpy(operands[i], regs[i - 1]);
1437 get_input_register_swizzle(*(pToken + i), swzstring);
1438 strcat(operands[i], swzstring);
1441 switch(curOpcode->opcode) {
1443 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[1], operands[3], operands[2]);
1446 sprintf(tmpLine, "ADD TMP, -%s, coefdiv.x;", operands[1]);
1447 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1448 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[2], operands[3]);
1452 strcat(tmpLine, "_SAT");
1453 strcat(tmpLine, " ");
1454 strcat(tmpLine, operands[0]);
1455 for (i = 1; i < curOpcode->num_params; i++) {
1456 strcat(tmpLine, ", ");
1457 strcat(tmpLine, operands[i]);
1459 strcat(tmpLine,";\n");
1461 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1462 pToken += curOpcode->num_params;
1464 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1465 if (curOpcode->num_params > 0) {
1466 DWORD param = *(pInstr + 1);
1467 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1469 /* Generate a line that handle the output modifier computation */
1471 char write_mask[20];
1472 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1473 get_register_name(param, regstr, This->constants);
1474 get_write_mask(param, write_mask);
1475 gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
1476 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1482 /* TODO: What about result.depth? */
1483 strcpy(tmpLine, "MOV result.color, R0;\n");
1484 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1486 strcpy(tmpLine, "END\n");
1487 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1490 /* finally null terminate the pgmStr*/
1491 pgmStr[pgmLength] = 0;
1492 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1493 /* Create the hw shader */
1495 /* pgmStr sometimes gets too long for a normal TRACE */
1496 TRACE("Generated program:\n");
1497 if (TRACE_ON(d3d_shader)) {
1498 fprintf(stderr, "%s\n", pgmStr);
1501 /* TODO: change to resource.glObjectHandel or something like that */
1502 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
1504 TRACE("Creating a hw pixel shader, prg=%d\n", This->baseShader.prgId);
1505 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->baseShader.prgId));
1507 TRACE("Created hw pixel shader, prg=%d\n", This->baseShader.prgId);
1508 /* Create the program and check for errors */
1509 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
1510 if (glGetError() == GL_INVALID_OPERATION) {
1512 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1513 FIXME("HW PixelShader Error at position %d: %s\n",
1514 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1515 This->baseShader.prgId = -1;
1518 #if 1 /* if were using the data buffer of device then we don't need to free it */
1519 HeapFree(GetProcessHeap(), 0, pgmStr);
1523 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1524 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1525 static const char swizzle_reg_chars[] = "rgba";
1527 /* the unknown mask is for bits not yet accounted for by any other mask... */
1528 #define UNKNOWN_MASK 0xC000
1530 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1531 #define EXTENDED_REG 0x1800
1533 DWORD reg = param & D3DSP_REGNUM_MASK;
1534 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
1537 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1538 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1539 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1540 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1542 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1546 switch (regtype /* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1554 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1557 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1560 case D3DSPR_RASTOUT:
1561 TRACE("%s", rastout_reg_names[reg]);
1563 case D3DSPR_ATTROUT:
1564 TRACE("oD%lu", reg);
1566 case D3DSPR_TEXCRDOUT:
1567 TRACE("oT%lu", reg);
1569 case D3DSPR_CONSTINT:
1570 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1572 case D3DSPR_CONSTBOOL:
1573 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1579 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1586 /** operand output */
1588 * for better debugging traces it's done into opcode dump code
1589 * @see pshader_program_dump_opcode
1590 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1591 DWORD mask = param & D3DSP_DSTMOD_MASK;
1593 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1595 TRACE("_unhandled_modifier(0x%08lx)", mask);
1598 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1599 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1601 TRACE("_x%u", 1 << shift);
1605 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1607 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1608 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1609 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1610 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1613 /** operand input */
1614 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1615 DWORD swizzle_r = swizzle & 0x03;
1616 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1617 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1618 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1620 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1621 DWORD mask = param & D3DSP_SRCMOD_MASK;
1622 /*TRACE("_modifier(0x%08lx) ", mask);*/
1624 case D3DSPSM_NONE: break;
1625 case D3DSPSM_NEG: break;
1626 case D3DSPSM_BIAS: TRACE("_bias"); break;
1627 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1628 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1629 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1630 case D3DSPSM_COMP: break;
1631 case D3DSPSM_X2: TRACE("_x2"); break;
1632 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1633 case D3DSPSM_DZ: TRACE("_dz"); break;
1634 case D3DSPSM_DW: TRACE("_dw"); break;
1636 TRACE("_unknown(0x%08lx)", mask);
1641 * swizzle bits fields:
1644 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1645 if (swizzle_r == swizzle_g &&
1646 swizzle_r == swizzle_b &&
1647 swizzle_r == swizzle_a) {
1648 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1651 swizzle_reg_chars[swizzle_r],
1652 swizzle_reg_chars[swizzle_g],
1653 swizzle_reg_chars[swizzle_b],
1654 swizzle_reg_chars[swizzle_a]);
1660 inline static void pshader_program_dump_decl_usage(IWineD3DPixelShaderImpl *This, DWORD token) {
1662 switch(token & 0xFFFF) {
1663 case D3DDECLUSAGE_POSITION:
1664 TRACE("%s%ld ", "position",(token & 0xF0000) >> 16);
1666 case D3DDECLUSAGE_BLENDINDICES:
1667 TRACE("%s ", "blend");
1669 case D3DDECLUSAGE_BLENDWEIGHT:
1670 TRACE("%s ", "weight");
1672 case D3DDECLUSAGE_NORMAL:
1673 TRACE("%s%ld ", "normal",(token & 0xF0000) >> 16);
1675 case D3DDECLUSAGE_PSIZE:
1676 TRACE("%s ", "psize");
1678 case D3DDECLUSAGE_COLOR:
1679 if((token & 0xF0000) >> 16 == 0) {
1680 TRACE("%s ", "color");
1682 TRACE("%s%ld ", "specular", ((token & 0xF0000) >> 16) - 1);
1685 case D3DDECLUSAGE_TEXCOORD:
1686 TRACE("%s%ld ", "texture", (token & 0xF0000) >> 16);
1688 case D3DDECLUSAGE_TANGENT:
1689 TRACE("%s ", "tangent");
1691 case D3DDECLUSAGE_BINORMAL:
1692 TRACE("%s ", "binormal");
1694 case D3DDECLUSAGE_TESSFACTOR:
1695 TRACE("%s ", "tessfactor");
1697 case D3DDECLUSAGE_POSITIONT:
1698 TRACE("%s%ld ", "positionT",(token & 0xF0000) >> 16);
1700 case D3DDECLUSAGE_FOG:
1701 TRACE("%s ", "fog");
1703 case D3DDECLUSAGE_DEPTH:
1704 TRACE("%s ", "depth");
1706 case D3DDECLUSAGE_SAMPLE:
1707 TRACE("%s ", "sample");
1710 FIXME("Unrecognised dcl %08lx", token & 0xFFFF);
1714 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1715 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1716 const DWORD* pToken = pFunction;
1717 const SHADER_OPCODE *curOpcode = NULL;
1720 TRACE("(%p) : Parsing programme\n", This);
1722 if (NULL != pToken) {
1723 while (D3DPS_END() != *pToken) {
1724 if (pshader_is_version_token(*pToken)) { /** version */
1725 This->baseShader.version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1726 TRACE("ps_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1731 if (pshader_is_comment_token(*pToken)) { /** comment */
1732 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1734 TRACE("//%s\n", (char*)pToken);
1735 pToken += comment_len;
1736 len += comment_len + 1;
1739 if (!This->baseShader.version) {
1740 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1742 curOpcode = pshader_program_get_opcode(This, *pToken);
1745 if (NULL == curOpcode) {
1747 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1748 while (*pToken & 0x80000000) {
1750 /* unknown current opcode ... */
1751 TRACE("unrecognized opcode: %08lx", *pToken);
1758 if (curOpcode->opcode == D3DSIO_DCL) {
1759 pshader_program_dump_decl_usage(This, *pToken);
1762 pshader_program_dump_ps_param(*pToken, 0);
1766 if (curOpcode->opcode == D3DSIO_DEF) {
1767 TRACE("def c%lu = ", *pToken & 0xFF);
1770 TRACE("%f ,", *(float *)pToken);
1773 TRACE("%f ,", *(float *)pToken);
1776 TRACE("%f ,", *(float *)pToken);
1779 TRACE("%f", *(float *)pToken);
1783 TRACE("%s ", curOpcode->name);
1784 if (curOpcode->num_params > 0) {
1785 pshader_program_dump_ps_param(*pToken, 0);
1788 for (i = 1; i < curOpcode->num_params; ++i) {
1790 pshader_program_dump_ps_param(*pToken, 1);
1799 This->baseShader.functionLength = (len + 1) * sizeof(DWORD);
1801 This->baseShader.functionLength = 1; /* no Function defined use fixed function vertex processing */
1804 /* Generate HW shader in needed */
1805 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1806 TRACE("(%p) : Generating hardware program\n", This);
1808 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1812 TRACE("(%p) : Copying the function\n", This);
1813 /* copy the function ... because it will certainly be released by application */
1814 if (NULL != pFunction) {
1815 This->baseShader.function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1816 memcpy((void *)This->baseShader.function, pFunction, This->baseShader.functionLength);
1818 This->baseShader.function = NULL;
1821 /* TODO: Some proper return values for failures */
1822 TRACE("(%p) : Returning WINED3D_OK\n", This);
1826 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1828 /*** IUnknown methods ***/
1829 IWineD3DPixelShaderImpl_QueryInterface,
1830 IWineD3DPixelShaderImpl_AddRef,
1831 IWineD3DPixelShaderImpl_Release,
1832 /*** IWineD3DBase methods ***/
1833 IWineD3DPixelShaderImpl_GetParent,
1834 /*** IWineD3DBaseShader methods ***/
1835 IWineD3DPixelShaderImpl_SetFunction,
1836 /*** IWineD3DPixelShader methods ***/
1837 IWineD3DPixelShaderImpl_GetDevice,
1838 IWineD3DPixelShaderImpl_GetFunction