2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
37 # define TRACE_VSVECTOR(name)
40 /* The maximum size of the program */
43 #define GLNAME_REQUIRE_GLSL ((const char *)1)
44 /* *******************************************
45 IWineD3DPixelShader IUnknown parts follow
46 ******************************************* */
47 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
49 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
50 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
51 if (IsEqualGUID(riid, &IID_IUnknown)
52 || IsEqualGUID(riid, &IID_IWineD3DBase)
53 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
54 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
55 IUnknown_AddRef(iface);
62 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
63 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
64 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
65 return InterlockedIncrement(&This->ref);
68 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
69 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
71 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
72 ref = InterlockedDecrement(&This->ref);
74 HeapFree(GetProcessHeap(), 0, This);
79 /* TODO: At the momeny the function parser is single pass, it achievs this
80 by passing constants to a couple of functions where they are then modified.
81 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
82 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
85 /* *******************************************
86 IWineD3DPixelShader IWineD3DPixelShader parts follow
87 ******************************************* */
89 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
90 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
92 *parent = This->parent;
93 IUnknown_AddRef(*parent);
94 TRACE("(%p) : returning %p\n", This, *parent);
98 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
99 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
100 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
101 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
102 TRACE("(%p) returning %p\n", This, *pDevice);
107 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
108 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
109 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
112 *pSizeOfData = This->baseShader.functionLength;
115 if (*pSizeOfData < This->baseShader.functionLength) {
116 *pSizeOfData = This->baseShader.functionLength;
117 return WINED3DERR_MOREDATA;
119 if (NULL == This->baseShader.function) { /* no function defined */
120 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
121 (*(DWORD **) pData) = NULL;
123 if (This->baseShader.functionLength == 0) {
126 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
127 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
132 /*******************************
133 * pshader functions software VM
136 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
137 d->x = s0->x + s1->x;
138 d->y = s0->y + s1->y;
139 d->z = s0->z + s1->z;
140 d->w = s0->w + s1->w;
141 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
142 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
145 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
146 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
147 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
148 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
151 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
152 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
153 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
154 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
157 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
159 d->y = s0->y * s1->y;
162 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
163 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
166 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
172 tmp.f = floorf(s0->w);
173 d->x = powf(2.0f, tmp.f);
174 d->y = s0->w - tmp.f;
175 tmp.f = powf(2.0f, s0->w);
176 tmp.d &= 0xFFFFFF00U;
179 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
180 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
183 void pshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
185 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
186 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
188 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
189 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
192 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
193 float tmp_f = fabsf(s0->w);
194 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
195 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
196 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
199 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
200 d->x = s0->x * s1->x + s2->x;
201 d->y = s0->y * s1->y + s2->y;
202 d->z = s0->z * s1->z + s2->z;
203 d->w = s0->w * s1->w + s2->w;
204 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
205 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
208 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
209 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
210 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
211 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
212 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
213 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
214 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
217 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
218 d->x = (s0->x < s1->x) ? s0->x : s1->x;
219 d->y = (s0->y < s1->y) ? s0->y : s1->y;
220 d->z = (s0->z < s1->z) ? s0->z : s1->z;
221 d->w = (s0->w < s1->w) ? s0->w : s1->w;
222 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
223 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
226 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
231 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
232 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
235 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
236 d->x = s0->x * s1->x;
237 d->y = s0->y * s1->y;
238 d->z = s0->z * s1->z;
239 d->w = s0->w * s1->w;
240 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
241 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
244 void pshader_nop(void) {
245 /* NOPPPP ahhh too easy ;) */
246 PSTRACE(("executing nop\n"));
249 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
250 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
251 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
252 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
255 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
256 float tmp_f = fabsf(s0->w);
257 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
258 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
259 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
262 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
263 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
264 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
265 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
266 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
267 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
268 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
271 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
272 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
273 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
274 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
275 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
276 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
277 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
280 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
281 d->x = s0->x - s1->x;
282 d->y = s0->y - s1->y;
283 d->z = s0->z - s1->z;
284 d->w = s0->w - s1->w;
285 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
286 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
290 * Version 1.1 specific
293 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
294 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
295 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
296 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
299 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
300 float tmp_f = fabsf(s0->w);
301 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
302 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
303 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
306 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
307 d->x = s0->x - floorf(s0->x);
308 d->y = s0->y - floorf(s0->y);
311 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
312 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
315 typedef FLOAT D3DMATRIX44[4][4];
316 typedef FLOAT D3DMATRIX43[4][3];
317 typedef FLOAT D3DMATRIX34[3][4];
318 typedef FLOAT D3DMATRIX33[3][3];
319 typedef FLOAT D3DMATRIX23[2][3];
321 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
323 * Buggy CODE: here only if cast not work for copy/paste
324 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
325 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
326 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
327 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
328 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
329 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
330 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
332 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
333 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
334 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
335 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
336 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
337 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
338 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
339 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
342 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
343 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
344 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
345 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
347 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
348 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
349 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
350 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
353 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
354 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
355 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
356 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
357 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
358 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
359 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
360 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
361 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
364 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
365 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
366 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
367 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
369 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
370 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
371 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
372 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
375 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
377 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
378 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
384 * Version 2.0 specific
386 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
387 d->x = s0->x * (s1->x - s2->x) + s2->x;
388 d->y = s0->y * (s1->y - s2->y) + s2->y;
389 d->z = s0->z * (s1->z - s2->z) + s2->z;
390 d->w = s0->w * (s1->w - s2->w) + s2->w;
393 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
394 d->x = s0->y * s1->z - s0->z * s1->y;
395 d->y = s0->z * s1->x - s0->x * s1->z;
396 d->z = s0->x * s1->y - s0->y * s1->x;
397 d->w = 0.9f; /* w is undefined, so set it to something safeish */
399 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
400 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
403 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
408 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
409 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
413 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
417 void pshader_texkill(WINED3DSHADERVECTOR* d) {
421 void pshader_tex(WINED3DSHADERVECTOR* d) {
424 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
428 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
432 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
436 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
440 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
444 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
448 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
452 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
456 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
460 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
464 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
468 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
472 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
476 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
477 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
481 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
485 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
489 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
493 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
497 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
501 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
505 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
509 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
513 void pshader_call(WINED3DSHADERVECTOR* d) {
517 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
521 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
525 void pshader_ret(void) {
529 void pshader_endloop(void) {
533 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
537 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
541 void pshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
545 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
549 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
553 void pshader_rep(WINED3DSHADERVECTOR* d) {
557 void pshader_endrep(void) {
561 void pshader_if(WINED3DSHADERVECTOR* d) {
565 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
569 void pshader_else(void) {
573 void pshader_label(WINED3DSHADERVECTOR* d) {
577 void pshader_endif(void) {
581 void pshader_break(void) {
585 void pshader_breakc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
589 void pshader_breakp(WINED3DSHADERVECTOR* d) {
593 void pshader_mova(WINED3DSHADERVECTOR* d) {
597 void pshader_defb(WINED3DSHADERVECTOR* d) {
601 void pshader_defi(WINED3DSHADERVECTOR* d) {
605 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
609 void pshader_dsx(WINED3DSHADERVECTOR* d) {
613 void pshader_dsy(WINED3DSHADERVECTOR* d) {
617 void pshader_texldd(WINED3DSHADERVECTOR* d) {
621 void pshader_setp(WINED3DSHADERVECTOR* d) {
625 void pshader_texldl(WINED3DSHADERVECTOR* d) {
630 * log, exp, frc, m*x* seems to be macros ins ... to see
632 CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[] = {
633 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
634 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
635 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
636 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
637 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
638 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
639 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
640 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
641 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
642 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
643 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
644 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
645 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
646 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
647 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
648 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
649 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
650 {D3DSIO_LIT, "lit", "LIT", 2, pshader_lit, 0, 0},
651 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
652 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
653 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
654 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
655 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
656 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
657 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
658 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
661 /** FIXME: use direct access so add the others opcodes as stubs */
662 /* DCL is a specil operation */
663 {D3DSIO_DCL, "dcl", NULL, 2, pshader_dcl, 0, 0},
664 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
665 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
666 /* TODO: sng can possibly be performed as
669 {D3DSIO_SGN, "sng", NULL, 2, pshader_sng, 0, 0},
670 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
673 MUL vec.xyz, vec, tmp;
674 but I think this is better because it accounts for w properly.
680 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
681 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
683 /* Flow control - requires GLSL or software shaders */
684 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 1, pshader_rep, 0, 0},
685 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
686 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 1, pshader_if, 0, 0},
687 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
688 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 0, pshader_else, 0, 0},
689 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 0, pshader_endif, 0, 0},
690 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 0, pshader_break, 0, 0},
691 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
692 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 1, pshader_breakp, 0, 0},
693 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
694 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
695 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
696 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
697 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
698 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
700 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, pshader_mova, 0, 0},
701 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
702 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
704 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
705 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
706 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
707 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
708 {D3DSIO_TEX, "texld", "undefined", 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
709 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 3, pshader_texld, D3DPS_VERSION(2,0), -1},
710 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
711 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
712 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
713 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
715 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
716 {D3DSIO_TEXM3x3PAD, "texm3x3pad", "undefined", 2, pshader_texm3x3pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
717 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
718 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
720 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
721 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
722 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
723 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
724 /* def is a special operation */
725 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
726 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
727 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
728 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
729 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
730 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
731 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
732 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
733 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
734 /* TODO: dp2add can be made out of multiple instuctions */
735 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
736 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
737 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
738 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
739 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
740 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
741 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
742 {0, NULL, NULL, 0, NULL, 0, 0}
746 inline static const SHADER_OPCODE* pshader_program_get_opcode(IWineD3DPixelShaderImpl *This, const DWORD code) {
748 DWORD version = This->baseShader.version;
749 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
750 const SHADER_OPCODE *shader_ins = This->baseShader.shader_ins;
752 /** TODO: use dichotomic search */
753 while (NULL != shader_ins[i].name) {
754 if (((code & D3DSI_OPCODE_MASK) == shader_ins[i].opcode) &&
755 (((hex_version >= shader_ins[i].min_version) && (hex_version <= shader_ins[i].max_version)) ||
756 ((shader_ins[i].min_version == 0) && (shader_ins[i].max_version == 0)))) {
757 return &shader_ins[i];
761 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code, code, code & D3DSI_OPCODE_MASK, version);
765 inline static BOOL pshader_is_version_token(DWORD token) {
766 return 0xFFFF0000 == (token & 0xFFFF0000);
769 inline static BOOL pshader_is_comment_token(DWORD token) {
770 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
774 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
775 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
777 DWORD reg = param & D3DSP_REGNUM_MASK;
778 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
782 sprintf(regstr, "R%lu", reg);
786 strcpy(regstr, "fragment.color.primary");
788 strcpy(regstr, "fragment.color.secondary");
793 sprintf(regstr, "C%lu", reg);
795 sprintf(regstr, "program.env[%lu]", reg);
797 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
798 sprintf(regstr,"T%lu", reg);
801 sprintf(regstr, "%s", rastout_reg_names[reg]);
804 sprintf(regstr, "oD[%lu]", reg);
806 case D3DSPR_TEXCRDOUT:
807 sprintf(regstr, "oT[%lu]", reg);
810 FIXME("Unhandled register name Type(%ld)\n", regtype);
815 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
817 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
818 strcat(write_mask, ".");
819 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
820 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
821 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
822 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
826 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
827 static const char swizzle_reg_chars[] = "rgba";
828 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
829 DWORD swizzle_x = swizzle & 0x03;
830 DWORD swizzle_y = (swizzle >> 2) & 0x03;
831 DWORD swizzle_z = (swizzle >> 4) & 0x03;
832 DWORD swizzle_w = (swizzle >> 6) & 0x03;
834 * swizzle bits fields:
838 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
839 if (swizzle_x == swizzle_y &&
840 swizzle_x == swizzle_z &&
841 swizzle_x == swizzle_w) {
842 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
844 sprintf(swzstring, ".%c%c%c%c",
845 swizzle_reg_chars[swizzle_x],
846 swizzle_reg_chars[swizzle_y],
847 swizzle_reg_chars[swizzle_z],
848 swizzle_reg_chars[swizzle_w]);
853 inline static void addline(unsigned int *lineNum, char *pgm, unsigned int *pgmLength, char *line) {
854 int lineLen = strlen(line);
855 if(lineLen + *pgmLength > PGMSIZE - 1 /* - 1 to allow a NULL at the end */) {
856 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, lineLen + *pgmLength);
859 memcpy(pgm + *pgmLength, line, lineLen);
862 *pgmLength += lineLen;
864 TRACE("GL HW (%u, %u) : %s", *lineNum, *pgmLength, line);
867 static const char* shift_tab[] = {
868 "dummy", /* 0 (none) */
869 "coefmul.x", /* 1 (x2) */
870 "coefmul.y", /* 2 (x4) */
871 "coefmul.z", /* 3 (x8) */
872 "coefmul.w", /* 4 (x16) */
873 "dummy", /* 5 (x32) */
874 "dummy", /* 6 (x64) */
875 "dummy", /* 7 (x128) */
876 "dummy", /* 8 (d256) */
877 "dummy", /* 9 (d128) */
878 "dummy", /* 10 (d64) */
879 "dummy", /* 11 (d32) */
880 "coefdiv.w", /* 12 (d16) */
881 "coefdiv.z", /* 13 (d8) */
882 "coefdiv.y", /* 14 (d4) */
883 "coefdiv.x" /* 15 (d2) */
886 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
887 /* Generate a line that does the output modifier computation */
888 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
891 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
892 /* Generate a line that does the input modifier computation and return the input register to use */
893 static char regstr[256];
894 static char tmpline[256];
897 /* Assume a new line will be added */
900 /* Get register name */
901 get_register_name(instr, regstr, constants);
903 TRACE(" Register name %s\n", regstr);
904 switch (instr & D3DSP_SRCMOD_MASK) {
906 strcpy(outregstr, regstr);
910 sprintf(outregstr, "-%s", regstr);
914 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
916 case D3DSPSM_BIASNEG:
917 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
920 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
922 case D3DSPSM_SIGNNEG:
923 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
926 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
929 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
932 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
935 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
936 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
937 strcat(line, "\n"); /* Hack */
938 strcat(line, tmpline);
941 sprintf(line, "RCP T%c, %s.w;", 'A' + tmpreg, regstr);
942 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
943 strcat(line, "\n"); /* Hack */
944 strcat(line, tmpline);
947 strcpy(outregstr, regstr);
952 /* Substitute the register name */
953 sprintf(outregstr, "T%c", 'A' + tmpreg);
959 inline static void pshader_program_get_registers_used(
960 IWineD3DPixelShaderImpl *This,
961 CONST DWORD* pToken, DWORD* tempsUsed, DWORD* texUsed) {
969 while (D3DVS_END() != *pToken) {
970 CONST SHADER_OPCODE* curOpcode;
973 if (pshader_is_version_token(*pToken)) {
978 } else if (pshader_is_comment_token(*pToken)) {
979 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
981 pToken += comment_len;
986 curOpcode = pshader_program_get_opcode(This, *pToken);
989 /* Skip declarations (for now) */
990 if (D3DSIO_DCL == curOpcode->opcode) {
991 pToken += curOpcode->num_params;
994 /* Skip definitions (for now) */
995 } else if (D3DSIO_DEF == curOpcode->opcode) {
996 pToken += curOpcode->num_params;
999 /* Set texture registers, and temporary registers */
1003 for (i = 0; i < curOpcode->num_params; ++i) {
1004 DWORD regtype = (((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
1005 DWORD reg = (*pToken) & D3DSP_REGNUM_MASK;
1006 if (D3DSPR_TEXTURE == regtype)
1007 *texUsed |= (1 << reg);
1008 if (D3DSPR_TEMP == regtype)
1009 *tempsUsed |= (1 << reg);
1016 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
1017 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1018 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1019 const DWORD *pToken = pFunction;
1020 const SHADER_OPCODE *curOpcode = NULL;
1021 const DWORD *pInstr;
1023 unsigned lineNum = 0; /* The line number of the generated program (for loging)*/
1024 char *pgmStr = NULL; /* A pointer to the program data generated by this function */
1026 #if 0 /* TODO: loop register (just another address register ) */
1027 BOOL hasLoops = FALSE;
1030 BOOL saturate; /* clamp to 0.0 -> 1.0*/
1031 int row = 0; /* not sure, something to do with macros? */
1033 int version = 0; /* The version of the shader */
1035 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
1036 unsigned int pgmLength = 0;
1038 /* Keep bitmaps of used temporary and texture registers */
1039 DWORD tempsUsed, texUsed;
1041 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
1042 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
1043 if (This->device->fixupVertexBufferSize < PGMSIZE) {
1044 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
1045 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
1046 This->fixupVertexBufferSize = PGMSIZE;
1047 This->fixupVertexBuffer[0] = 0;
1049 pgmStr = This->device->fixupVertexBuffer;
1051 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, PGMSIZE); /* 64kb should be enough */
1054 /* TODO: Think about using a first pass to work out what's required for the second pass. */
1055 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
1056 This->constants[i] = 0;
1058 /* First pass: figure out which temporary and texture registers are used */
1059 pshader_program_get_registers_used(This, pToken, &tempsUsed, &texUsed);
1060 TRACE("Texture registers used: %#lx, Temp registers used %#lx\n", texUsed, tempsUsed);
1062 /* TODO: check register usage against GL/Directx limits, and fail if they're exceeded */
1064 /* Second pass, process opcodes */
1065 if (NULL != pToken) {
1066 while (D3DPS_END() != *pToken) {
1067 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1069 instructionSize = pToken & SIZEBITS >> 27;
1072 if (pshader_is_version_token(*pToken)) { /** version */
1077 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1078 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1080 TRACE("found version token ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1082 /* Each release of pixel shaders has had different numbers of temp registers */
1087 case 13:numTemps=12;
1091 case 14: numTemps=12;
1095 case 20: numTemps=12;
1098 FIXME("No work done yet to support ps2.0 in hw\n");
1100 case 30: numTemps=32;
1103 FIXME("No work done yet to support ps3.0 in hw\n");
1109 FIXME("Unrecognized pixel shader version!\n");
1112 /* FIXME: if jumps are used, use GLSL, else use ARB_fragment_program */
1113 strcpy(tmpLine, "!!ARBfp1.0\n");
1114 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1116 for(i = 0; i < numTex; i++) {
1117 if (texUsed & (1 << i)) {
1118 sprintf(tmpLine, "TEMP T%lu;\n", i);
1119 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1123 for(i = 0; i < numTemps; i++) {
1124 if (tempsUsed & (1 << i)) {
1125 sprintf(tmpLine, "TEMP R%lu;\n", i);
1126 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1130 sprintf(tmpLine, "TEMP TMP;\n");
1131 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1132 sprintf(tmpLine, "TEMP TMP2;\n");
1133 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1134 sprintf(tmpLine, "TEMP TA;\n");
1135 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1136 sprintf(tmpLine, "TEMP TB;\n");
1137 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1138 sprintf(tmpLine, "TEMP TC;\n");
1139 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1141 strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1142 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1143 strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1144 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1145 strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1146 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1148 for(i = 0; i < numTex; i++) {
1149 if (texUsed & (1 << i)) {
1150 sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1151 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1159 if (pshader_is_comment_token(*pToken)) { /** comment */
1160 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1162 TRACE("#%s\n", (char*)pToken);
1163 pToken += comment_len;
1167 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1171 curOpcode = pshader_program_get_opcode(This, *pToken);
1173 if (NULL == curOpcode) {
1174 /* unknown current opcode ... (shouldn't be any!) */
1175 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1176 FIXME("unrecognized opcode: %08lx\n", *pToken);
1179 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1180 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1181 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1182 pToken += curOpcode->num_params;
1184 } else if (D3DSIO_DEF == curOpcode->opcode) {
1186 /* Handle definitions here, they don't fit well with the
1187 * other instructions below [for now ] */
1189 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1191 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1192 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1194 sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1195 *((const float *)(pToken + 1)),
1196 *((const float *)(pToken + 2)),
1197 *((const float *)(pToken + 3)),
1198 *((const float *)(pToken + 4)) );
1200 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1202 This->constants[reg] = 1;
1208 /* Common processing: [inst] [dst] [src]* */
1211 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1212 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1216 /* Build opcode for GL vertex_program */
1217 switch (curOpcode->opcode) {
1244 case D3DSIO_TEXKILL:
1245 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
1246 strcpy(tmpLine, curOpcode->glname);
1251 get_write_mask(*pToken, tmp);
1252 if (version != 14) {
1253 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1254 sprintf(tmpLine,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg, tmp, reg, reg);
1255 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1259 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1260 if (gen_input_modifier_line(*++pToken, 0, reg2, tmpLine, This->constants)) {
1261 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1263 sprintf(tmpLine,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1, tmp, reg2, reg1);
1264 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1270 case D3DSIO_TEXCOORD:
1273 get_write_mask(*pToken, tmp);
1274 if (version != 14) {
1275 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1276 sprintf(tmpLine, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1277 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1280 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1281 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1282 sprintf(tmpLine, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1283 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1289 case D3DSIO_TEXM3x2PAD:
1291 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1293 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1294 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1296 sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1297 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1302 case D3DSIO_TEXM3x2TEX:
1304 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1306 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1307 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1309 sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1310 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1311 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1312 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1317 case D3DSIO_TEXREG2AR:
1319 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1320 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1321 sprintf(tmpLine, "MOV TMP.r, T%lu.a;\n", reg2);
1322 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1323 sprintf(tmpLine, "MOV TMP.g, T%lu.r;\n", reg2);
1324 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1325 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1326 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1331 case D3DSIO_TEXREG2GB:
1333 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1334 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1335 sprintf(tmpLine, "MOV TMP.r, T%lu.g;\n", reg2);
1336 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1337 sprintf(tmpLine, "MOV TMP.g, T%lu.b;\n", reg2);
1338 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1339 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1340 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1347 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1348 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1350 /* FIXME: Should apply the BUMPMAPENV matrix */
1351 sprintf(tmpLine, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1352 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1353 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1354 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1359 case D3DSIO_TEXM3x3PAD:
1361 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1363 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1364 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1366 sprintf(tmpLine, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1367 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1373 case D3DSIO_TEXM3x3TEX:
1375 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1377 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1378 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1381 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1382 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1384 /* Cubemap textures will be more used than 3D ones. */
1385 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1386 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1391 case D3DSIO_TEXM3x3VSPEC:
1393 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1395 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1396 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1398 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1399 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1401 /* Construct the eye-ray vector from w coordinates */
1402 sprintf(tmpLine, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1403 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1404 sprintf(tmpLine, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1405 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1406 sprintf(tmpLine, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1407 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1409 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1410 sprintf(tmpLine, "DP3 TMP.w, TMP, TMP2;\n");
1411 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1412 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1413 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1414 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1415 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1417 /* Cubemap textures will be more used than 3D ones. */
1418 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1419 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1425 case D3DSIO_TEXM3x3SPEC:
1427 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1428 DWORD reg3 = *(pToken + 2) & D3DSP_REGNUM_MASK;
1430 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants)) {
1431 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1433 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1434 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1436 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1437 sprintf(tmpLine, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1438 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1440 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1441 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1442 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1443 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1445 /* Cubemap textures will be more used than 3D ones. */
1446 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1447 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1455 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1456 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1458 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1460 pToken += curOpcode->num_params;
1464 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1465 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1467 case D3DSPDM_SATURATE: saturate = TRUE; break;
1468 #if 0 /* as yet unhandled modifiers */
1469 case D3DSPDM_CENTROID: centroid = TRUE; break;
1470 case D3DSPDM_PP: partialpresision = TRUE; break;
1471 case D3DSPDM_X2: X2 = TRUE; break;
1472 case D3DSPDM_X4: X4 = TRUE; break;
1473 case D3DSPDM_X8: X8 = TRUE; break;
1474 case D3DSPDM_D2: D2 = TRUE; break;
1475 case D3DSPDM_D4: D4 = TRUE; break;
1476 case D3DSPDM_D8: D8 = TRUE; break;
1479 TRACE("_unhandled_modifier(0x%08lx)\n", mask);
1483 /* Generate input and output registers */
1484 if (curOpcode->num_params > 0) {
1486 char operands[4][100];
1491 /* Generate lines that handle input modifier computation */
1492 for (i = 1; i < curOpcode->num_params; ++i) {
1493 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1494 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpOp, This->constants)) {
1495 addline(&lineNum, pgmStr, &pgmLength, tmpOp);
1499 /* Handle saturation only when no shift is present in the output modifier */
1500 if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
1503 /* Handle output register */
1504 get_register_name(*pToken, tmpOp, This->constants);
1505 strcpy(operands[0], tmpOp);
1506 get_write_mask(*pToken, tmpOp);
1507 strcat(operands[0], tmpOp);
1509 /* This function works because of side effects from gen_input_modifier_line */
1510 /* Handle input registers */
1511 for (i = 1; i < curOpcode->num_params; ++i) {
1512 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1513 strcpy(operands[i], regs[i - 1]);
1514 get_input_register_swizzle(*(pToken + i), swzstring);
1515 strcat(operands[i], swzstring);
1518 switch(curOpcode->opcode) {
1520 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[1], operands[3], operands[2]);
1523 sprintf(tmpLine, "ADD TMP, -%s, coefdiv.x;", operands[1]);
1524 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1525 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[2], operands[3]);
1529 strcat(tmpLine, "_SAT");
1530 strcat(tmpLine, " ");
1531 strcat(tmpLine, operands[0]);
1532 for (i = 1; i < curOpcode->num_params; i++) {
1533 strcat(tmpLine, ", ");
1534 strcat(tmpLine, operands[i]);
1536 strcat(tmpLine,";\n");
1538 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1539 pToken += curOpcode->num_params;
1541 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1542 if (curOpcode->num_params > 0) {
1543 DWORD param = *(pInstr + 1);
1544 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1546 /* Generate a line that handle the output modifier computation */
1548 char write_mask[20];
1549 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1550 get_register_name(param, regstr, This->constants);
1551 get_write_mask(param, write_mask);
1552 gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
1553 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1559 /* TODO: What about result.depth? */
1560 strcpy(tmpLine, "MOV result.color, R0;\n");
1561 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1563 strcpy(tmpLine, "END\n");
1564 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1567 /* finally null terminate the pgmStr*/
1568 pgmStr[pgmLength] = 0;
1569 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1570 /* Create the hw shader */
1572 /* pgmStr sometimes gets too long for a normal TRACE */
1573 TRACE("Generated program:\n");
1574 if (TRACE_ON(d3d_shader)) {
1575 fprintf(stderr, "%s\n", pgmStr);
1578 /* TODO: change to resource.glObjectHandel or something like that */
1579 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
1581 TRACE("Creating a hw pixel shader, prg=%d\n", This->baseShader.prgId);
1582 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->baseShader.prgId));
1584 TRACE("Created hw pixel shader, prg=%d\n", This->baseShader.prgId);
1585 /* Create the program and check for errors */
1586 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
1587 if (glGetError() == GL_INVALID_OPERATION) {
1589 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1590 FIXME("HW PixelShader Error at position %d: %s\n",
1591 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1592 This->baseShader.prgId = -1;
1595 #if 1 /* if were using the data buffer of device then we don't need to free it */
1596 HeapFree(GetProcessHeap(), 0, pgmStr);
1600 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1601 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1602 static const char swizzle_reg_chars[] = "rgba";
1604 /* the unknown mask is for bits not yet accounted for by any other mask... */
1605 #define UNKNOWN_MASK 0xC000
1607 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1608 #define EXTENDED_REG 0x1800
1610 DWORD reg = param & D3DSP_REGNUM_MASK;
1611 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
1614 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1615 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1616 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1617 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1619 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1623 switch (regtype /* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1631 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1634 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1637 case D3DSPR_RASTOUT:
1638 TRACE("%s", rastout_reg_names[reg]);
1640 case D3DSPR_ATTROUT:
1641 TRACE("oD%lu", reg);
1643 case D3DSPR_TEXCRDOUT:
1644 TRACE("oT%lu", reg);
1646 case D3DSPR_CONSTINT:
1647 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1649 case D3DSPR_CONSTBOOL:
1650 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1656 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1663 /** operand output */
1665 * for better debugging traces it's done into opcode dump code
1666 * @see pshader_program_dump_opcode
1667 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1668 DWORD mask = param & D3DSP_DSTMOD_MASK;
1670 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1672 TRACE("_unhandled_modifier(0x%08lx)", mask);
1675 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1676 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1678 TRACE("_x%u", 1 << shift);
1682 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1684 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1685 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1686 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1687 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1690 /** operand input */
1691 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1692 DWORD swizzle_r = swizzle & 0x03;
1693 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1694 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1695 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1697 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1698 DWORD mask = param & D3DSP_SRCMOD_MASK;
1699 /*TRACE("_modifier(0x%08lx) ", mask);*/
1701 case D3DSPSM_NONE: break;
1702 case D3DSPSM_NEG: break;
1703 case D3DSPSM_BIAS: TRACE("_bias"); break;
1704 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1705 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1706 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1707 case D3DSPSM_COMP: break;
1708 case D3DSPSM_X2: TRACE("_x2"); break;
1709 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1710 case D3DSPSM_DZ: TRACE("_dz"); break;
1711 case D3DSPSM_DW: TRACE("_dw"); break;
1713 TRACE("_unknown(0x%08lx)", mask);
1718 * swizzle bits fields:
1721 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1722 if (swizzle_r == swizzle_g &&
1723 swizzle_r == swizzle_b &&
1724 swizzle_r == swizzle_a) {
1725 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1728 swizzle_reg_chars[swizzle_r],
1729 swizzle_reg_chars[swizzle_g],
1730 swizzle_reg_chars[swizzle_b],
1731 swizzle_reg_chars[swizzle_a]);
1737 inline static void pshader_program_dump_decl_usage(IWineD3DPixelShaderImpl *This, DWORD token) {
1739 switch(token & 0xFFFF) {
1740 case D3DDECLUSAGE_POSITION:
1741 TRACE("%s%ld ", "position",(token & 0xF0000) >> 16);
1743 case D3DDECLUSAGE_BLENDINDICES:
1744 TRACE("%s ", "blend");
1746 case D3DDECLUSAGE_BLENDWEIGHT:
1747 TRACE("%s ", "weight");
1749 case D3DDECLUSAGE_NORMAL:
1750 TRACE("%s%ld ", "normal",(token & 0xF0000) >> 16);
1752 case D3DDECLUSAGE_PSIZE:
1753 TRACE("%s ", "psize");
1755 case D3DDECLUSAGE_COLOR:
1756 if((token & 0xF0000) >> 16 == 0) {
1757 TRACE("%s ", "color");
1759 TRACE("%s%ld ", "specular", ((token & 0xF0000) >> 16) - 1);
1762 case D3DDECLUSAGE_TEXCOORD:
1763 TRACE("%s%ld ", "texture", (token & 0xF0000) >> 16);
1765 case D3DDECLUSAGE_TANGENT:
1766 TRACE("%s ", "tangent");
1768 case D3DDECLUSAGE_BINORMAL:
1769 TRACE("%s ", "binormal");
1771 case D3DDECLUSAGE_TESSFACTOR:
1772 TRACE("%s ", "tessfactor");
1774 case D3DDECLUSAGE_POSITIONT:
1775 TRACE("%s%ld ", "positionT",(token & 0xF0000) >> 16);
1777 case D3DDECLUSAGE_FOG:
1778 TRACE("%s ", "fog");
1780 case D3DDECLUSAGE_DEPTH:
1781 TRACE("%s ", "depth");
1783 case D3DDECLUSAGE_SAMPLE:
1784 TRACE("%s ", "sample");
1787 FIXME("Unrecognised dcl %08lx", token & 0xFFFF);
1791 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1792 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1793 const DWORD* pToken = pFunction;
1794 const SHADER_OPCODE *curOpcode = NULL;
1797 TRACE("(%p) : Parsing programme\n", This);
1799 if (NULL != pToken) {
1800 while (D3DPS_END() != *pToken) {
1801 if (pshader_is_version_token(*pToken)) { /** version */
1802 This->baseShader.version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1803 TRACE("ps_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1808 if (pshader_is_comment_token(*pToken)) { /** comment */
1809 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1811 TRACE("//%s\n", (char*)pToken);
1812 pToken += comment_len;
1813 len += comment_len + 1;
1816 if (!This->baseShader.version) {
1817 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1819 curOpcode = pshader_program_get_opcode(This, *pToken);
1822 if (NULL == curOpcode) {
1824 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1825 while (*pToken & 0x80000000) {
1827 /* unknown current opcode ... */
1828 TRACE("unrecognized opcode: %08lx", *pToken);
1835 if (curOpcode->opcode == D3DSIO_DCL) {
1836 pshader_program_dump_decl_usage(This, *pToken);
1839 pshader_program_dump_ps_param(*pToken, 0);
1843 if (curOpcode->opcode == D3DSIO_DEF) {
1844 TRACE("def c%lu = ", *pToken & 0xFF);
1847 TRACE("%f ,", *(float *)pToken);
1850 TRACE("%f ,", *(float *)pToken);
1853 TRACE("%f ,", *(float *)pToken);
1856 TRACE("%f", *(float *)pToken);
1860 TRACE("%s ", curOpcode->name);
1861 if (curOpcode->num_params > 0) {
1862 pshader_program_dump_ps_param(*pToken, 0);
1865 for (i = 1; i < curOpcode->num_params; ++i) {
1867 pshader_program_dump_ps_param(*pToken, 1);
1876 This->baseShader.functionLength = (len + 1) * sizeof(DWORD);
1878 This->baseShader.functionLength = 1; /* no Function defined use fixed function vertex processing */
1881 /* Generate HW shader in needed */
1882 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1883 TRACE("(%p) : Generating hardware program\n", This);
1885 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1889 TRACE("(%p) : Copying the function\n", This);
1890 /* copy the function ... because it will certainly be released by application */
1891 if (NULL != pFunction) {
1892 This->baseShader.function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1893 memcpy((void *)This->baseShader.function, pFunction, This->baseShader.functionLength);
1895 This->baseShader.function = NULL;
1898 /* TODO: Some proper return values for failures */
1899 TRACE("(%p) : Returning WINED3D_OK\n", This);
1903 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1905 /*** IUnknown methods ***/
1906 IWineD3DPixelShaderImpl_QueryInterface,
1907 IWineD3DPixelShaderImpl_AddRef,
1908 IWineD3DPixelShaderImpl_Release,
1909 /*** IWineD3DBase methods ***/
1910 IWineD3DPixelShaderImpl_GetParent,
1911 /*** IWineD3DBaseShader methods ***/
1912 IWineD3DPixelShaderImpl_SetFunction,
1913 /*** IWineD3DPixelShader methods ***/
1914 IWineD3DPixelShaderImpl_GetDevice,
1915 IWineD3DPixelShaderImpl_GetFunction