2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
37 # define TRACE_VSVECTOR(name)
40 /* The maximum size of the program */
43 #define REGMASK 0x00001FFF
45 #define GLNAME_REQUIRE_GLSL ((const char *)1)
46 /* *******************************************
47 IWineD3DPixelShader IUnknown parts follow
48 ******************************************* */
49 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
51 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
52 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
53 if (IsEqualGUID(riid, &IID_IUnknown)
54 || IsEqualGUID(riid, &IID_IWineD3DBase)
55 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
56 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
57 IUnknown_AddRef(iface);
64 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
65 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
66 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
67 return InterlockedIncrement(&This->ref);
70 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
71 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
73 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
74 ref = InterlockedDecrement(&This->ref);
76 HeapFree(GetProcessHeap(), 0, This);
81 /* TODO: At the momeny the function parser is single pass, it achievs this
82 by passing constants to a couple of functions where they are then modified.
83 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
84 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
87 /* *******************************************
88 IWineD3DPixelShader IWineD3DPixelShader parts follow
89 ******************************************* */
91 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
92 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
94 *parent = This->parent;
95 IUnknown_AddRef(*parent);
96 TRACE("(%p) : returning %p\n", This, *parent);
100 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
101 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
102 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
103 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
104 TRACE("(%p) returning %p\n", This, *pDevice);
109 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
110 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
111 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
114 *pSizeOfData = This->baseShader.functionLength;
117 if (*pSizeOfData < This->baseShader.functionLength) {
118 *pSizeOfData = This->baseShader.functionLength;
119 return D3DERR_MOREDATA;
121 if (NULL == This->baseShader.function) { /* no function defined */
122 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
123 (*(DWORD **) pData) = NULL;
125 if (This->baseShader.functionLength == 0) {
128 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
129 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
134 /*******************************
135 * pshader functions software VM
138 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
139 d->x = s0->x + s1->x;
140 d->y = s0->y + s1->y;
141 d->z = s0->z + s1->z;
142 d->w = s0->w + s1->w;
143 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
144 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
147 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
148 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
149 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
150 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
153 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
154 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
155 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
156 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
159 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
161 d->y = s0->y * s1->y;
164 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
165 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
168 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
174 tmp.f = floorf(s0->w);
175 d->x = powf(2.0f, tmp.f);
176 d->y = s0->w - tmp.f;
177 tmp.f = powf(2.0f, s0->w);
178 tmp.d &= 0xFFFFFF00U;
181 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
182 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
185 void pshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
187 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
188 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
190 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
191 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
194 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
195 float tmp_f = fabsf(s0->w);
196 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
197 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
198 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
201 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
202 d->x = s0->x * s1->x + s2->x;
203 d->y = s0->y * s1->y + s2->y;
204 d->z = s0->z * s1->z + s2->z;
205 d->w = s0->w * s1->w + s2->w;
206 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
207 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
210 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
211 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
212 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
213 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
214 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
215 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
216 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
219 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
220 d->x = (s0->x < s1->x) ? s0->x : s1->x;
221 d->y = (s0->y < s1->y) ? s0->y : s1->y;
222 d->z = (s0->z < s1->z) ? s0->z : s1->z;
223 d->w = (s0->w < s1->w) ? s0->w : s1->w;
224 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
225 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
228 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
233 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
234 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
237 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
238 d->x = s0->x * s1->x;
239 d->y = s0->y * s1->y;
240 d->z = s0->z * s1->z;
241 d->w = s0->w * s1->w;
242 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
243 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
246 void pshader_nop(void) {
247 /* NOPPPP ahhh too easy ;) */
248 PSTRACE(("executing nop\n"));
251 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
252 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
253 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
254 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
257 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
258 float tmp_f = fabsf(s0->w);
259 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
260 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
261 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
264 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
265 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
266 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
267 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
268 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
269 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
270 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
273 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
274 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
275 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
276 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
277 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
278 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
279 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
282 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
283 d->x = s0->x - s1->x;
284 d->y = s0->y - s1->y;
285 d->z = s0->z - s1->z;
286 d->w = s0->w - s1->w;
287 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
288 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
292 * Version 1.1 specific
295 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
296 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
297 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
298 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
301 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
302 float tmp_f = fabsf(s0->w);
303 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
304 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
305 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
308 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
309 d->x = s0->x - floorf(s0->x);
310 d->y = s0->y - floorf(s0->y);
313 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
314 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
317 typedef FLOAT D3DMATRIX44[4][4];
318 typedef FLOAT D3DMATRIX43[4][3];
319 typedef FLOAT D3DMATRIX34[3][4];
320 typedef FLOAT D3DMATRIX33[3][3];
321 typedef FLOAT D3DMATRIX23[2][3];
323 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
325 * Buggy CODE: here only if cast not work for copy/paste
326 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
327 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
328 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
329 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
330 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
331 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
332 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
334 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
335 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
336 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
337 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
338 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
339 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
340 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
341 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
344 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
345 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
346 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
347 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
349 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
350 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
351 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
352 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
355 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
356 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
357 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
358 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
359 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
360 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
361 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
362 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
363 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
366 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
367 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
368 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
369 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
371 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
372 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
373 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
374 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
377 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
379 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
380 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
386 * Version 2.0 specific
388 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
389 d->x = s0->x * (s1->x - s2->x) + s2->x;
390 d->y = s0->y * (s1->y - s2->y) + s2->y;
391 d->z = s0->z * (s1->z - s2->z) + s2->z;
392 d->w = s0->w * (s1->w - s2->w) + s2->w;
395 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
396 d->x = s0->y * s1->z - s0->z * s1->y;
397 d->y = s0->z * s1->x - s0->x * s1->z;
398 d->z = s0->x * s1->y - s0->y * s1->x;
399 d->w = 0.9f; /* w is undefined, so set it to something safeish */
401 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
402 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
405 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
410 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
411 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
415 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
419 void pshader_texkill(WINED3DSHADERVECTOR* d) {
423 void pshader_tex(WINED3DSHADERVECTOR* d) {
426 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
430 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
434 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
438 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
442 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
446 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
450 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
454 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
458 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
462 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
466 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
470 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
474 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
478 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
479 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
483 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
487 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
491 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
495 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
499 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
503 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
507 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
511 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
515 void pshader_call(WINED3DSHADERVECTOR* d) {
519 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
523 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
527 void pshader_ret(WINED3DSHADERVECTOR* d) {
531 void pshader_endloop(WINED3DSHADERVECTOR* d) {
535 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
539 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
543 void pshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
547 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
551 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
555 void pshader_rep(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
559 void pshader_endrep(void) {
563 void pshader_if(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
567 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
571 void pshader_else(WINED3DSHADERVECTOR* d) {
575 void pshader_label(WINED3DSHADERVECTOR* d) {
579 void pshader_endif(WINED3DSHADERVECTOR* d) {
583 void pshader_break(WINED3DSHADERVECTOR* d) {
587 void pshader_breakc(WINED3DSHADERVECTOR* d) {
591 void pshader_mova(WINED3DSHADERVECTOR* d) {
595 void pshader_defb(WINED3DSHADERVECTOR* d) {
599 void pshader_defi(WINED3DSHADERVECTOR* d) {
603 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
607 void pshader_dsx(WINED3DSHADERVECTOR* d) {
611 void pshader_dsy(WINED3DSHADERVECTOR* d) {
615 void pshader_texldd(WINED3DSHADERVECTOR* d) {
619 void pshader_setp(WINED3DSHADERVECTOR* d) {
623 void pshader_texldl(WINED3DSHADERVECTOR* d) {
627 void pshader_breakp(WINED3DSHADERVECTOR* d) {
631 * log, exp, frc, m*x* seems to be macros ins ... to see
633 CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[] = {
634 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
635 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
636 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
637 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
638 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
639 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
640 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
641 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
642 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
643 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
644 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
645 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
646 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
647 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
648 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
649 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
650 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
651 {D3DSIO_LIT, "lit", "LIT", 2, pshader_lit, 0, 0},
652 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
653 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
654 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
655 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
656 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
657 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
658 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
659 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
662 /** FIXME: use direct access so add the others opcodes as stubs */
663 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
664 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
665 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
666 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
667 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
668 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
669 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
670 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
671 /* DCL is a specil operation */
672 {D3DSIO_DCL, "dcl", NULL, 1, pshader_dcl, 0, 0},
673 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
674 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
675 /* TODO: sng can possibly be performed as
678 {D3DSIO_SGN, "sng", NULL, 2, pshader_sng, 0, 0},
679 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
682 MUL vec.xyz, vec, tmp;
683 but I think this is better because it accounts for w properly.
689 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
690 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
691 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 2, pshader_rep, 0, 0},
692 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
693 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 2, pshader_if, 0, 0},
694 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
695 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 2, pshader_else, 0, 0},
696 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 2, pshader_endif, 0, 0},
697 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 2, pshader_break, 0, 0},
698 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
699 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, pshader_mova, 0, 0},
700 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
701 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
703 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
704 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
705 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
706 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
707 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
708 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 3, pshader_texld, D3DPS_VERSION(2,0), -1},
709 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
710 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
711 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
712 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
713 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
715 {D3DSIO_TEXM3x3PAD, "texm3x3pad", "undefined", 2, pshader_texm3x3pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
716 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
717 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
718 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
720 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
721 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
722 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
723 /* def is a special operation */
724 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
725 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
726 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
727 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
728 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
729 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
730 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
731 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
732 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
733 /* TODO: dp2add can be made out of multiple instuctions */
734 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
735 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
736 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
737 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
738 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
739 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
740 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 2, pshader_breakp, 0, 0},
741 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
742 {0, NULL, NULL, 0, NULL, 0, 0}
746 inline static const SHADER_OPCODE* pshader_program_get_opcode(IWineD3DPixelShaderImpl *This, const DWORD code) {
748 DWORD version = This->baseShader.version;
749 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
750 const SHADER_OPCODE *shader_ins = This->baseShader.shader_ins;
752 /** TODO: use dichotomic search */
753 while (NULL != shader_ins[i].name) {
754 if (((code & D3DSI_OPCODE_MASK) == shader_ins[i].opcode) &&
755 (((hex_version >= shader_ins[i].min_version) && (hex_version <= shader_ins[i].max_version)) ||
756 ((shader_ins[i].min_version == 0) && (shader_ins[i].max_version == 0)))) {
757 return &shader_ins[i];
761 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code, code, code & D3DSI_OPCODE_MASK, version);
765 inline static BOOL pshader_is_version_token(DWORD token) {
766 return 0xFFFF0000 == (token & 0xFFFF0000);
769 inline static BOOL pshader_is_comment_token(DWORD token) {
770 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
774 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
775 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
777 DWORD reg = param & REGMASK;
778 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
782 sprintf(regstr, "R%lu", reg);
786 strcpy(regstr, "fragment.color.primary");
788 strcpy(regstr, "fragment.color.secondary");
793 sprintf(regstr, "C%lu", reg);
795 sprintf(regstr, "program.env[%lu]", reg);
797 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
798 sprintf(regstr,"T%lu", reg);
801 sprintf(regstr, "%s", rastout_reg_names[reg]);
804 sprintf(regstr, "oD[%lu]", reg);
806 case D3DSPR_TEXCRDOUT:
807 sprintf(regstr, "oT[%lu]", reg);
810 FIXME("Unhandled register name Type(%ld)\n", regtype);
815 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
817 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
818 strcat(write_mask, ".");
819 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
820 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
821 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
822 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
826 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
827 static const char swizzle_reg_chars[] = "rgba";
828 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
829 DWORD swizzle_x = swizzle & 0x03;
830 DWORD swizzle_y = (swizzle >> 2) & 0x03;
831 DWORD swizzle_z = (swizzle >> 4) & 0x03;
832 DWORD swizzle_w = (swizzle >> 6) & 0x03;
834 * swizzle bits fields:
838 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
839 if (swizzle_x == swizzle_y &&
840 swizzle_x == swizzle_z &&
841 swizzle_x == swizzle_w) {
842 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
844 sprintf(swzstring, ".%c%c%c%c",
845 swizzle_reg_chars[swizzle_x],
846 swizzle_reg_chars[swizzle_y],
847 swizzle_reg_chars[swizzle_z],
848 swizzle_reg_chars[swizzle_w]);
853 inline static void addline(unsigned int *lineNum, char *pgm, unsigned int *pgmLength, char *line) {
854 int lineLen = strlen(line);
855 if(lineLen + *pgmLength > PGMSIZE - 1 /* - 1 to allow a NULL at the end */) {
856 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, lineLen + *pgmLength);
859 memcpy(pgm + *pgmLength, line, lineLen);
862 *pgmLength += lineLen;
864 TRACE("GL HW (%u, %u) : %s", *lineNum, *pgmLength, line);
867 static const char* shift_tab[] = {
868 "dummy", /* 0 (none) */
869 "coefmul.x", /* 1 (x2) */
870 "coefmul.y", /* 2 (x4) */
871 "coefmul.z", /* 3 (x8) */
872 "coefmul.w", /* 4 (x16) */
873 "dummy", /* 5 (x32) */
874 "dummy", /* 6 (x64) */
875 "dummy", /* 7 (x128) */
876 "dummy", /* 8 (d256) */
877 "dummy", /* 9 (d128) */
878 "dummy", /* 10 (d64) */
879 "dummy", /* 11 (d32) */
880 "coefdiv.w", /* 12 (d16) */
881 "coefdiv.z", /* 13 (d8) */
882 "coefdiv.y", /* 14 (d4) */
883 "coefdiv.x" /* 15 (d2) */
886 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
887 /* Generate a line that does the output modifier computation */
888 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
891 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
892 /* Generate a line that does the input modifier computation and return the input register to use */
893 static char regstr[256];
894 static char tmpline[256];
897 /* Assume a new line will be added */
900 /* Get register name */
901 get_register_name(instr, regstr, constants);
903 TRACE(" Register name %s\n", regstr);
904 switch (instr & D3DSP_SRCMOD_MASK) {
906 strcpy(outregstr, regstr);
910 sprintf(outregstr, "-%s", regstr);
914 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
916 case D3DSPSM_BIASNEG:
917 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
920 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
922 case D3DSPSM_SIGNNEG:
923 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
926 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
929 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
932 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
935 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
936 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
937 strcat(line, "\n"); /* Hack */
938 strcat(line, tmpline);
941 sprintf(line, "RCP T%c, %s;", 'A' + tmpreg, regstr);
942 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
943 strcat(line, "\n"); /* Hack */
944 strcat(line, tmpline);
947 strcpy(outregstr, regstr);
952 /* Substitute the register name */
953 sprintf(outregstr, "T%c", 'A' + tmpreg);
958 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
959 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
960 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
961 const DWORD *pToken = pFunction;
962 const SHADER_OPCODE *curOpcode = NULL;
965 unsigned lineNum = 0; /* The line number of the generated program (for loging)*/
966 char *pgmStr = NULL; /* A pointer to the program data generated by this function */
968 DWORD nUseAddressRegister = 0;
969 #if 0 /* TODO: loop register (just another address register ) */
970 BOOL hasLoops = FALSE;
973 BOOL saturate; /* clamp to 0.0 -> 1.0*/
974 int row = 0; /* not sure, something to do with macros? */
976 int version = 0; /* The version of the shader */
978 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
979 unsigned int pgmLength = 0;
981 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
982 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
983 if (This->device->fixupVertexBufferSize < PGMSIZE) {
984 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
985 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
986 This->fixupVertexBufferSize = PGMSIZE;
987 This->fixupVertexBuffer[0] = 0;
989 pgmStr = This->device->fixupVertexBuffer;
991 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, PGMSIZE); /* 64kb should be enough */
995 /* TODO: Think about using a first pass to work out what's required for the second pass. */
996 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
997 This->constants[i] = 0;
999 if (NULL != pToken) {
1000 while (D3DPS_END() != *pToken) {
1001 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1003 instructionSize = pToken & SIZEBITS >> 27;
1006 if (pshader_is_version_token(*pToken)) { /** version */
1010 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1011 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1013 TRACE("found version token ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1015 /* Each release of pixel shaders has had different numbers of temp registers */
1021 case 14: numTemps=12;
1023 strcpy(tmpLine, "!!ARBfp1.0\n");
1025 case 20: numTemps=12;
1027 strcpy(tmpLine, "!!ARBfp2.0\n");
1028 FIXME("No work done yet to support ps2.0 in hw\n");
1030 case 30: numTemps=32;
1032 strcpy(tmpLine, "!!ARBfp3.0\n");
1033 FIXME("No work done yet to support ps3.0 in hw\n");
1038 strcpy(tmpLine, "!!ARBfp1.0\n");
1039 FIXME("Unrecognized pixel shader version!\n");
1041 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1043 /* TODO: find out how many registers are really needed */
1044 for(i = 0; i < 6; i++) {
1045 sprintf(tmpLine, "TEMP T%lu;\n", i);
1046 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1049 for(i = 0; i < 6; i++) {
1050 sprintf(tmpLine, "TEMP R%lu;\n", i);
1051 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1054 sprintf(tmpLine, "TEMP TMP;\n");
1055 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1056 sprintf(tmpLine, "TEMP TMP2;\n");
1057 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1058 sprintf(tmpLine, "TEMP TA;\n");
1059 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1060 sprintf(tmpLine, "TEMP TB;\n");
1061 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1062 sprintf(tmpLine, "TEMP TC;\n");
1063 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1065 strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1066 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1067 strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1068 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1069 strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1070 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1072 for(i = 0; i < 4; i++) {
1073 sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1074 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1081 if (pshader_is_comment_token(*pToken)) { /** comment */
1082 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1084 FIXME("#%s\n", (char*)pToken);
1085 pToken += comment_len;
1089 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1093 curOpcode = pshader_program_get_opcode(This, *pToken);
1095 if (NULL == curOpcode) {
1096 /* unknown current opcode ... (shouldn't be any!) */
1097 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1098 FIXME("unrecognized opcode: %08lx\n", *pToken);
1101 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1102 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1103 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1104 pToken += curOpcode->num_params;
1106 TRACE("Found opcode %s %s\n", curOpcode->name, curOpcode->glname);
1109 /* Build opcode for GL vertex_program */
1110 switch (curOpcode->opcode) {
1115 /* Address registers must be loaded with the ARL instruction */
1116 if ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) == D3DSPR_ADDR) {
1117 if (((*pToken) & REGMASK) < nUseAddressRegister) {
1118 strcpy(tmpLine, "ARL");
1121 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This, ((*pToken) & REGMASK));
1146 case D3DSIO_TEXKILL:
1147 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
1148 strcpy(tmpLine, curOpcode->glname);
1152 DWORD reg = *pToken & REGMASK;
1153 sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1154 *((const float *)(pToken + 1)),
1155 *((const float *)(pToken + 2)),
1156 *((const float *)(pToken + 3)),
1157 *((const float *)(pToken + 4)) );
1159 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1161 This->constants[reg] = 1;
1169 get_write_mask(*pToken, tmp);
1170 if (version != 14) {
1171 DWORD reg = *pToken & REGMASK;
1172 sprintf(tmpLine,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg, tmp, reg, reg);
1173 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1177 DWORD reg1 = *pToken & REGMASK;
1178 DWORD reg2 = *++pToken & REGMASK;
1179 if (gen_input_modifier_line(*pToken, 0, reg, tmpLine, This->constants)) {
1180 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1182 sprintf(tmpLine,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1, tmp, reg, reg2);
1183 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1189 case D3DSIO_TEXCOORD:
1192 get_write_mask(*pToken, tmp);
1193 if (version != 14) {
1194 DWORD reg = *pToken & REGMASK;
1195 sprintf(tmpLine, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1196 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1199 DWORD reg1 = *pToken & REGMASK;
1200 DWORD reg2 = *++pToken & REGMASK;
1201 sprintf(tmpLine, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1202 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1208 case D3DSIO_TEXM3x2PAD:
1210 DWORD reg = *pToken & REGMASK;
1212 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1213 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1215 sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1216 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1221 case D3DSIO_TEXM3x2TEX:
1223 DWORD reg = *pToken & REGMASK;
1225 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1226 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1228 sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1229 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1230 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1231 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1236 case D3DSIO_TEXREG2AR:
1238 DWORD reg1 = *pToken & REGMASK;
1239 DWORD reg2 = *++pToken & REGMASK;
1240 sprintf(tmpLine, "MOV TMP.r, T%lu.a;\n", reg2);
1241 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1242 sprintf(tmpLine, "MOV TMP.g, T%lu.r;\n", reg2);
1243 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1244 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1245 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1250 case D3DSIO_TEXREG2GB:
1252 DWORD reg1 = *pToken & REGMASK;
1253 DWORD reg2 = *++pToken & REGMASK;
1254 sprintf(tmpLine, "MOV TMP.r, T%lu.g;\n", reg2);
1255 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1256 sprintf(tmpLine, "MOV TMP.g, T%lu.b;\n", reg2);
1257 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1258 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1259 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1266 DWORD reg1 = *pToken & REGMASK;
1267 DWORD reg2 = *++pToken & REGMASK;
1269 /* FIXME: Should apply the BUMPMAPENV matrix */
1270 sprintf(tmpLine, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1271 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1272 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1273 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1278 case D3DSIO_TEXM3x3PAD:
1280 DWORD reg = *pToken & REGMASK;
1282 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1283 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1285 sprintf(tmpLine, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1286 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1292 case D3DSIO_TEXM3x3TEX:
1294 DWORD reg = *pToken & REGMASK;
1296 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1297 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1300 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1301 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1303 /* Cubemap textures will be more used than 3D ones. */
1304 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1305 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1310 case D3DSIO_TEXM3x3VSPEC:
1312 DWORD reg = *pToken & REGMASK;
1314 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1315 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1317 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1318 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1320 /* Construct the eye-ray vector from w coordinates */
1321 sprintf(tmpLine, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1322 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1323 sprintf(tmpLine, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1324 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1325 sprintf(tmpLine, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1326 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1328 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1329 sprintf(tmpLine, "DP3 TMP.w, TMP, TMP2;\n");
1330 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1331 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1332 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1333 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1334 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1336 /* Cubemap textures will be more used than 3D ones. */
1337 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1338 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1344 case D3DSIO_TEXM3x3SPEC:
1346 DWORD reg = *pToken & REGMASK;
1347 DWORD reg3 = *(pToken + 2) & REGMASK;
1349 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants)) {
1350 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1352 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1353 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1355 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1356 sprintf(tmpLine, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1357 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1359 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1360 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1361 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1362 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1364 /* Cubemap textures will be more used than 3D ones. */
1365 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1366 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1374 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1375 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1377 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1379 pToken += curOpcode->num_params; /* maybe + 1 */
1383 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1384 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1386 case D3DSPDM_SATURATE: saturate = TRUE; break;
1387 #if 0 /* as yet unhandled modifiers */
1388 case D3DSPDM_CENTROID: centroid = TRUE; break;
1389 case D3DSPDM_PP: partialpresision = TRUE; break;
1390 case D3DSPDM_X2: X2 = TRUE; break;
1391 case D3DSPDM_X4: X4 = TRUE; break;
1392 case D3DSPDM_X8: X8 = TRUE; break;
1393 case D3DSPDM_D2: D2 = TRUE; break;
1394 case D3DSPDM_D4: D4 = TRUE; break;
1395 case D3DSPDM_D8: D8 = TRUE; break;
1398 TRACE("_unhandled_modifier(0x%08lx)\n", mask);
1402 /* Generate input and output registers */
1403 if (curOpcode->num_params > 0) {
1405 char operands[4][100];
1409 TRACE("(%p): Opcode has %d params\n", This, curOpcode->num_params);
1411 /* Generate lines that handle input modifier computation */
1412 for (i = 1; i < curOpcode->num_params; ++i) {
1413 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1414 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpOp, This->constants)) {
1415 addline(&lineNum, pgmStr, &pgmLength, tmpOp);
1419 /* Handle saturation only when no shift is present in the output modifier */
1420 if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
1423 /* Handle output register */
1424 get_register_name(*pToken, tmpOp, This->constants);
1425 strcpy(operands[0], tmpOp);
1426 get_write_mask(*pToken, tmpOp);
1427 strcat(operands[0], tmpOp);
1429 /* This function works because of side effects from gen_input_modifier_line */
1430 /* Handle input registers */
1431 for (i = 1; i < curOpcode->num_params; ++i) {
1432 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1433 strcpy(operands[i], regs[i - 1]);
1434 get_input_register_swizzle(*(pToken + i), swzstring);
1435 strcat(operands[i], swzstring);
1438 switch(curOpcode->opcode) {
1440 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[1], operands[3], operands[2]);
1443 sprintf(tmpLine, "ADD TMP, -%s, coefdiv.x;", operands[1]);
1444 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1445 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[2], operands[3]);
1449 strcat(tmpLine, "_SAT");
1450 strcat(tmpLine, " ");
1451 strcat(tmpLine, operands[0]);
1452 for (i = 1; i < curOpcode->num_params; i++) {
1453 strcat(tmpLine, ", ");
1454 strcat(tmpLine, operands[i]);
1456 strcat(tmpLine,";\n");
1458 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1459 pToken += curOpcode->num_params;
1461 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1462 if (curOpcode->num_params > 0) {
1463 DWORD param = *(pInstr + 1);
1464 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1466 /* Generate a line that handle the output modifier computation */
1468 char write_mask[20];
1469 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1470 get_register_name(param, regstr, This->constants);
1471 get_write_mask(param, write_mask);
1472 gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
1473 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1479 /* TODO: What about result.depth? */
1480 strcpy(tmpLine, "MOV result.color, R0;\n");
1481 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1483 strcpy(tmpLine, "END\n");
1484 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1487 /* finally null terminate the pgmStr*/
1488 pgmStr[pgmLength] = 0;
1489 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1490 /* Create the hw shader */
1492 /* pgmStr sometimes gets too long for a normal TRACE */
1493 TRACE("Generated program:\n");
1494 if (TRACE_ON(d3d_shader)) {
1495 fprintf(stderr, "%s\n", pgmStr);
1498 /* TODO: change to resource.glObjectHandel or something like that */
1499 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
1501 TRACE("Creating a hw pixel shader, prg=%d\n", This->baseShader.prgId);
1502 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->baseShader.prgId));
1504 TRACE("Created hw pixel shader, prg=%d\n", This->baseShader.prgId);
1505 /* Create the program and check for errors */
1506 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
1507 if (glGetError() == GL_INVALID_OPERATION) {
1509 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1510 FIXME("HW PixelShader Error at position %d: %s\n",
1511 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1512 This->baseShader.prgId = -1;
1515 #if 1 /* if were using the data buffer of device then we don't need to free it */
1516 HeapFree(GetProcessHeap(), 0, pgmStr);
1520 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1521 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1522 static const char swizzle_reg_chars[] = "rgba";
1524 /* the unknown mask is for bits not yet accounted for by any other mask... */
1525 #define UNKNOWN_MASK 0xC000
1527 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1528 #define EXTENDED_REG 0x1800
1530 DWORD reg = param & D3DSP_REGNUM_MASK;
1531 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
1534 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1535 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1536 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1537 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1539 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1543 switch (regtype /* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1551 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1554 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1557 case D3DSPR_RASTOUT:
1558 TRACE("%s", rastout_reg_names[reg]);
1560 case D3DSPR_ATTROUT:
1561 TRACE("oD%lu", reg);
1563 case D3DSPR_TEXCRDOUT:
1564 TRACE("oT%lu", reg);
1566 case D3DSPR_CONSTINT:
1567 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1569 case D3DSPR_CONSTBOOL:
1570 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1576 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1583 /** operand output */
1585 * for better debugging traces it's done into opcode dump code
1586 * @see pshader_program_dump_opcode
1587 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1588 DWORD mask = param & D3DSP_DSTMOD_MASK;
1590 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1592 TRACE("_unhandled_modifier(0x%08lx)", mask);
1595 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1596 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1598 TRACE("_x%u", 1 << shift);
1602 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1604 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1605 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1606 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1607 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1610 /** operand input */
1611 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1612 DWORD swizzle_r = swizzle & 0x03;
1613 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1614 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1615 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1617 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1618 DWORD mask = param & D3DSP_SRCMOD_MASK;
1619 /*TRACE("_modifier(0x%08lx) ", mask);*/
1621 case D3DSPSM_NONE: break;
1622 case D3DSPSM_NEG: break;
1623 case D3DSPSM_BIAS: TRACE("_bias"); break;
1624 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1625 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1626 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1627 case D3DSPSM_COMP: break;
1628 case D3DSPSM_X2: TRACE("_x2"); break;
1629 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1630 case D3DSPSM_DZ: TRACE("_dz"); break;
1631 case D3DSPSM_DW: TRACE("_dw"); break;
1633 TRACE("_unknown(0x%08lx)", mask);
1638 * swizzle bits fields:
1641 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1642 if (swizzle_r == swizzle_g &&
1643 swizzle_r == swizzle_b &&
1644 swizzle_r == swizzle_a) {
1645 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1648 swizzle_reg_chars[swizzle_r],
1649 swizzle_reg_chars[swizzle_g],
1650 swizzle_reg_chars[swizzle_b],
1651 swizzle_reg_chars[swizzle_a]);
1657 inline static void pshader_program_dump_decl_usage(IWineD3DPixelShaderImpl *This, DWORD token) {
1659 switch(token & 0xFFFF) {
1660 case D3DDECLUSAGE_POSITION:
1661 TRACE("%s%ld ", "position",(token & 0xF0000) >> 16);
1663 case D3DDECLUSAGE_BLENDINDICES:
1664 TRACE("%s ", "blend");
1666 case D3DDECLUSAGE_BLENDWEIGHT:
1667 TRACE("%s ", "weight");
1669 case D3DDECLUSAGE_NORMAL:
1670 TRACE("%s%ld ", "normal",(token & 0xF0000) >> 16);
1672 case D3DDECLUSAGE_PSIZE:
1673 TRACE("%s ", "psize");
1675 case D3DDECLUSAGE_COLOR:
1676 if((token & 0xF0000) >> 16 == 0) {
1677 TRACE("%s ", "color");
1679 TRACE("%s%ld ", "specular", ((token & 0xF0000) >> 16) - 1);
1682 case D3DDECLUSAGE_TEXCOORD:
1683 TRACE("%s%ld ", "texture", (token & 0xF0000) >> 16);
1685 case D3DDECLUSAGE_TANGENT:
1686 TRACE("%s ", "tangent");
1688 case D3DDECLUSAGE_BINORMAL:
1689 TRACE("%s ", "binormal");
1691 case D3DDECLUSAGE_TESSFACTOR:
1692 TRACE("%s ", "tessfactor");
1694 case D3DDECLUSAGE_POSITIONT:
1695 TRACE("%s%ld ", "positionT",(token & 0xF0000) >> 16);
1697 case D3DDECLUSAGE_FOG:
1698 TRACE("%s ", "fog");
1700 case D3DDECLUSAGE_DEPTH:
1701 TRACE("%s ", "depth");
1703 case D3DDECLUSAGE_SAMPLE:
1704 TRACE("%s ", "sample");
1707 FIXME("Unrecognised dcl %08lx", token & 0xFFFF);
1711 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1712 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1713 const DWORD* pToken = pFunction;
1714 const SHADER_OPCODE *curOpcode = NULL;
1717 TRACE("(%p) : Parsing programme\n", This);
1719 if (NULL != pToken) {
1720 while (D3DPS_END() != *pToken) {
1721 if (pshader_is_version_token(*pToken)) { /** version */
1722 This->baseShader.version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1723 TRACE("ps_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1728 if (pshader_is_comment_token(*pToken)) { /** comment */
1729 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1731 TRACE("//%s\n", (char*)pToken);
1732 pToken += comment_len;
1733 len += comment_len + 1;
1736 if (!This->baseShader.version) {
1737 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1739 curOpcode = pshader_program_get_opcode(This, *pToken);
1742 if (NULL == curOpcode) {
1744 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1745 while (*pToken & 0x80000000) {
1747 /* unknown current opcode ... */
1748 TRACE("unrecognized opcode: %08lx", *pToken);
1755 if (curOpcode->opcode == D3DSIO_DCL) {
1756 pshader_program_dump_decl_usage(This, *pToken);
1759 pshader_program_dump_ps_param(*pToken, 0);
1763 if (curOpcode->opcode == D3DSIO_DEF) {
1764 TRACE("def c%lu = ", *pToken & 0xFF);
1767 TRACE("%f ,", *(float *)pToken);
1770 TRACE("%f ,", *(float *)pToken);
1773 TRACE("%f ,", *(float *)pToken);
1776 TRACE("%f", *(float *)pToken);
1780 TRACE("%s ", curOpcode->name);
1781 if (curOpcode->num_params > 0) {
1782 pshader_program_dump_ps_param(*pToken, 0);
1785 for (i = 1; i < curOpcode->num_params; ++i) {
1787 pshader_program_dump_ps_param(*pToken, 1);
1796 This->baseShader.functionLength = (len + 1) * sizeof(DWORD);
1798 This->baseShader.functionLength = 1; /* no Function defined use fixed function vertex processing */
1801 /* Generate HW shader in needed */
1802 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1803 TRACE("(%p) : Generating hardware program\n", This);
1805 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1809 TRACE("(%p) : Copying the function\n", This);
1810 /* copy the function ... because it will certainly be released by application */
1811 if (NULL != pFunction) {
1812 This->baseShader.function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1813 memcpy((void *)This->baseShader.function, pFunction, This->baseShader.functionLength);
1815 This->baseShader.function = NULL;
1818 /* TODO: Some proper return values for failures */
1819 TRACE("(%p) : Returning D3D_OK\n", This);
1823 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1825 /*** IUnknown methods ***/
1826 IWineD3DPixelShaderImpl_QueryInterface,
1827 IWineD3DPixelShaderImpl_AddRef,
1828 IWineD3DPixelShaderImpl_Release,
1829 /*** IWineD3DBase methods ***/
1830 IWineD3DPixelShaderImpl_GetParent,
1831 /*** IWineD3DBaseShader methods ***/
1832 IWineD3DPixelShaderImpl_SetFunction,
1833 /*** IWineD3DPixelShader methods ***/
1834 IWineD3DPixelShaderImpl_GetDevice,
1835 IWineD3DPixelShaderImpl_GetFunction