wined3d: Recognize the SM4 deriv_rtx opcode.
[wine] / dlls / wined3d / vertexdeclaration.c
1 /*
2  * vertex declaration implementation
3  *
4  * Copyright 2002-2005 Raphael Junqueira
5  * Copyright 2004 Jason Edmeades
6  * Copyright 2004 Christian Costa
7  * Copyright 2005 Oliver Stieber
8  * Copyright 2009 Henri Verbeet for CodeWeavers
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24
25 #include "config.h"
26 #include "wined3d_private.h"
27
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_decl);
29
30 static void dump_wined3dvertexelement(const WINED3DVERTEXELEMENT *element) {
31     TRACE("     format: %s (%#x)\n", debug_d3dformat(element->format), element->format);
32     TRACE(" input_slot: %u\n", element->input_slot);
33     TRACE("     offset: %u\n", element->offset);
34     TRACE("output_slot: %u\n", element->output_slot);
35     TRACE("     method: %s (%#x)\n", debug_d3ddeclmethod(element->method), element->method);
36     TRACE("      usage: %s (%#x)\n", debug_d3ddeclusage(element->usage), element->usage);
37     TRACE("  usage_idx: %u\n", element->usage_idx);
38 }
39
40 ULONG CDECL wined3d_vertex_declaration_incref(struct wined3d_vertex_declaration *declaration)
41 {
42     ULONG refcount = InterlockedIncrement(&declaration->ref);
43
44     TRACE("%p increasing refcount to %u.\n", declaration, refcount);
45
46     return refcount;
47 }
48
49 ULONG CDECL wined3d_vertex_declaration_decref(struct wined3d_vertex_declaration *declaration)
50 {
51     ULONG refcount = InterlockedDecrement(&declaration->ref);
52
53     TRACE("%p decreasing refcount to %u.\n", declaration, refcount);
54
55     if (!refcount)
56     {
57         HeapFree(GetProcessHeap(), 0, declaration->elements);
58         declaration->parent_ops->wined3d_object_destroyed(declaration->parent);
59         HeapFree(GetProcessHeap(), 0, declaration);
60     }
61
62     return refcount;
63 }
64
65 void * CDECL wined3d_vertex_declaration_get_parent(const struct wined3d_vertex_declaration *declaration)
66 {
67     TRACE("declaration %p.\n", declaration);
68
69     return declaration->parent;
70 }
71
72 static BOOL declaration_element_valid_ffp(const WINED3DVERTEXELEMENT *element)
73 {
74     switch(element->usage)
75     {
76         case WINED3DDECLUSAGE_POSITION:
77         case WINED3DDECLUSAGE_POSITIONT:
78             switch(element->format)
79             {
80                 case WINED3DFMT_R32G32_FLOAT:
81                 case WINED3DFMT_R32G32B32_FLOAT:
82                 case WINED3DFMT_R32G32B32A32_FLOAT:
83                 case WINED3DFMT_R16G16_SINT:
84                 case WINED3DFMT_R16G16B16A16_SINT:
85                 case WINED3DFMT_R16G16_FLOAT:
86                 case WINED3DFMT_R16G16B16A16_FLOAT:
87                     return TRUE;
88                 default:
89                     return FALSE;
90             }
91
92         case WINED3DDECLUSAGE_BLENDWEIGHT:
93             switch(element->format)
94             {
95                 case WINED3DFMT_R32_FLOAT:
96                 case WINED3DFMT_R32G32_FLOAT:
97                 case WINED3DFMT_R32G32B32_FLOAT:
98                 case WINED3DFMT_R32G32B32A32_FLOAT:
99                 case WINED3DFMT_B8G8R8A8_UNORM:
100                 case WINED3DFMT_R8G8B8A8_UINT:
101                 case WINED3DFMT_R16G16_SINT:
102                 case WINED3DFMT_R16G16B16A16_SINT:
103                 case WINED3DFMT_R16G16_FLOAT:
104                 case WINED3DFMT_R16G16B16A16_FLOAT:
105                     return TRUE;
106                 default:
107                     return FALSE;
108             }
109
110         case WINED3DDECLUSAGE_NORMAL:
111             switch(element->format)
112             {
113                 case WINED3DFMT_R32G32B32_FLOAT:
114                 case WINED3DFMT_R32G32B32A32_FLOAT:
115                 case WINED3DFMT_R16G16B16A16_SINT:
116                 case WINED3DFMT_R16G16B16A16_FLOAT:
117                     return TRUE;
118                 default:
119                     return FALSE;
120             }
121
122         case WINED3DDECLUSAGE_TEXCOORD:
123             switch(element->format)
124             {
125                 case WINED3DFMT_R32_FLOAT:
126                 case WINED3DFMT_R32G32_FLOAT:
127                 case WINED3DFMT_R32G32B32_FLOAT:
128                 case WINED3DFMT_R32G32B32A32_FLOAT:
129                 case WINED3DFMT_R16G16_SINT:
130                 case WINED3DFMT_R16G16B16A16_SINT:
131                 case WINED3DFMT_R16G16_FLOAT:
132                 case WINED3DFMT_R16G16B16A16_FLOAT:
133                     return TRUE;
134                 default:
135                     return FALSE;
136             }
137
138         case WINED3DDECLUSAGE_COLOR:
139             switch(element->format)
140             {
141                 case WINED3DFMT_R32G32B32_FLOAT:
142                 case WINED3DFMT_R32G32B32A32_FLOAT:
143                 case WINED3DFMT_B8G8R8A8_UNORM:
144                 case WINED3DFMT_R8G8B8A8_UINT:
145                 case WINED3DFMT_R16G16B16A16_SINT:
146                 case WINED3DFMT_R8G8B8A8_UNORM:
147                 case WINED3DFMT_R16G16B16A16_SNORM:
148                 case WINED3DFMT_R16G16B16A16_UNORM:
149                 case WINED3DFMT_R16G16B16A16_FLOAT:
150                     return TRUE;
151                 default:
152                     return FALSE;
153             }
154
155         default:
156             return FALSE;
157     }
158 }
159
160 static HRESULT vertexdeclaration_init(struct wined3d_vertex_declaration *declaration,
161         struct wined3d_device *device, const WINED3DVERTEXELEMENT *elements, UINT element_count,
162         void *parent, const struct wined3d_parent_ops *parent_ops)
163 {
164     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
165     WORD preloaded = 0; /* MAX_STREAMS, 16 */
166     unsigned int i;
167
168     if (TRACE_ON(d3d_decl))
169     {
170         for (i = 0; i < element_count; ++i)
171         {
172             dump_wined3dvertexelement(elements + i);
173         }
174     }
175
176     declaration->ref = 1;
177     declaration->parent = parent;
178     declaration->parent_ops = parent_ops;
179     declaration->device = device;
180     declaration->elements = HeapAlloc(GetProcessHeap(), 0, sizeof(*declaration->elements) * element_count);
181     if (!declaration->elements)
182     {
183         ERR("Failed to allocate elements memory.\n");
184         return E_OUTOFMEMORY;
185     }
186     declaration->element_count = element_count;
187
188     /* Do some static analysis on the elements to make reading the
189      * declaration more comfortable for the drawing code. */
190     for (i = 0; i < element_count; ++i)
191     {
192         struct wined3d_vertex_declaration_element *e = &declaration->elements[i];
193
194         e->format = wined3d_get_format(gl_info, elements[i].format);
195         e->ffp_valid = declaration_element_valid_ffp(&elements[i]);
196         e->input_slot = elements[i].input_slot;
197         e->offset = elements[i].offset;
198         e->output_slot = elements[i].output_slot;
199         e->method = elements[i].method;
200         e->usage = elements[i].usage;
201         e->usage_idx = elements[i].usage_idx;
202
203         if (e->usage == WINED3DDECLUSAGE_POSITIONT) declaration->position_transformed = TRUE;
204
205         /* Find the streams used in the declaration. The vertex buffers have
206          * to be loaded when drawing, but filter tesselation pseudo streams. */
207         if (e->input_slot >= MAX_STREAMS) continue;
208
209         if (!e->format->gl_vtx_format)
210         {
211             FIXME("The application tries to use an unsupported format (%s), returning E_FAIL.\n",
212                     debug_d3dformat(elements[i].format));
213             HeapFree(GetProcessHeap(), 0, declaration->elements);
214             return E_FAIL;
215         }
216
217         if (e->offset & 0x3)
218         {
219             WARN("Declaration element %u is not 4 byte aligned(%u), returning E_FAIL.\n", i, e->offset);
220             HeapFree(GetProcessHeap(), 0, declaration->elements);
221             return E_FAIL;
222         }
223
224         if (!(preloaded & (1 << e->input_slot)))
225         {
226             declaration->streams[declaration->num_streams] = e->input_slot;
227             ++declaration->num_streams;
228             preloaded |= 1 << e->input_slot;
229         }
230
231         if (elements[i].format == WINED3DFMT_R16G16_FLOAT || elements[i].format == WINED3DFMT_R16G16B16A16_FLOAT)
232         {
233             if (!gl_info->supported[ARB_HALF_FLOAT_VERTEX]) declaration->half_float_conv_needed = TRUE;
234         }
235     }
236
237     return WINED3D_OK;
238 }
239
240 HRESULT CDECL wined3d_vertex_declaration_create(struct wined3d_device *device,
241         const WINED3DVERTEXELEMENT *elements, UINT element_count, void *parent,
242         const struct wined3d_parent_ops *parent_ops, struct wined3d_vertex_declaration **declaration)
243 {
244     struct wined3d_vertex_declaration *object;
245     HRESULT hr;
246
247     TRACE("device %p, elements %p, element_count %u, parent %p, parent_ops %p, declaration %p.\n",
248             device, elements, element_count, parent, parent_ops, declaration);
249
250     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
251     if(!object)
252     {
253         ERR("Failed to allocate vertex declaration memory.\n");
254         return E_OUTOFMEMORY;
255     }
256
257     hr = vertexdeclaration_init(object, device, elements, element_count, parent, parent_ops);
258     if (FAILED(hr))
259     {
260         WARN("Failed to initialize vertex declaration, hr %#x.\n", hr);
261         HeapFree(GetProcessHeap(), 0, object);
262         return hr;
263     }
264
265     TRACE("Created vertex declaration %p.\n", object);
266     *declaration = object;
267
268     return WINED3D_OK;
269 }
270
271 struct wined3d_fvf_convert_state
272 {
273     const struct wined3d_gl_info *gl_info;
274     WINED3DVERTEXELEMENT *elements;
275     UINT offset;
276     UINT idx;
277 };
278
279 static void append_decl_element(struct wined3d_fvf_convert_state *state,
280         enum wined3d_format_id format_id, WINED3DDECLUSAGE usage, UINT usage_idx)
281 {
282     WINED3DVERTEXELEMENT *elements = state->elements;
283     const struct wined3d_format *format;
284     UINT offset = state->offset;
285     UINT idx = state->idx;
286
287     elements[idx].format = format_id;
288     elements[idx].input_slot = 0;
289     elements[idx].offset = offset;
290     elements[idx].output_slot = 0;
291     elements[idx].method = WINED3DDECLMETHOD_DEFAULT;
292     elements[idx].usage = usage;
293     elements[idx].usage_idx = usage_idx;
294
295     format = wined3d_get_format(state->gl_info, format_id);
296     state->offset += format->component_count * format->component_size;
297     ++state->idx;
298 }
299
300 static unsigned int convert_fvf_to_declaration(const struct wined3d_gl_info *gl_info,
301         DWORD fvf, WINED3DVERTEXELEMENT **elements)
302 {
303     BOOL has_pos = !!(fvf & WINED3DFVF_POSITION_MASK);
304     BOOL has_blend = (fvf & WINED3DFVF_XYZB5) > WINED3DFVF_XYZRHW;
305     BOOL has_blend_idx = has_blend &&
306        (((fvf & WINED3DFVF_XYZB5) == WINED3DFVF_XYZB5) ||
307         (fvf & WINED3DFVF_LASTBETA_D3DCOLOR) ||
308         (fvf & WINED3DFVF_LASTBETA_UBYTE4));
309     BOOL has_normal = !!(fvf & WINED3DFVF_NORMAL);
310     BOOL has_psize = !!(fvf & WINED3DFVF_PSIZE);
311     BOOL has_diffuse = !!(fvf & WINED3DFVF_DIFFUSE);
312     BOOL has_specular = !!(fvf & WINED3DFVF_SPECULAR);
313
314     DWORD num_textures = (fvf & WINED3DFVF_TEXCOUNT_MASK) >> WINED3DFVF_TEXCOUNT_SHIFT;
315     DWORD texcoords = (fvf & 0xFFFF0000) >> 16;
316     struct wined3d_fvf_convert_state state;
317     unsigned int size;
318     unsigned int idx;
319     DWORD num_blends = 1 + (((fvf & WINED3DFVF_XYZB5) - WINED3DFVF_XYZB1) >> 1);
320     if (has_blend_idx) num_blends--;
321
322     /* Compute declaration size */
323     size = has_pos + (has_blend && num_blends > 0) + has_blend_idx + has_normal +
324            has_psize + has_diffuse + has_specular + num_textures;
325
326     state.gl_info = gl_info;
327     state.elements = HeapAlloc(GetProcessHeap(), 0, size * sizeof(*state.elements));
328     if (!state.elements) return ~0U;
329     state.offset = 0;
330     state.idx = 0;
331
332     if (has_pos)
333     {
334         if (!has_blend && (fvf & WINED3DFVF_XYZRHW))
335             append_decl_element(&state, WINED3DFMT_R32G32B32A32_FLOAT, WINED3DDECLUSAGE_POSITIONT, 0);
336         else if ((fvf & WINED3DFVF_XYZW) == WINED3DFVF_XYZW)
337             append_decl_element(&state, WINED3DFMT_R32G32B32A32_FLOAT, WINED3DDECLUSAGE_POSITION, 0);
338         else
339             append_decl_element(&state, WINED3DFMT_R32G32B32_FLOAT, WINED3DDECLUSAGE_POSITION, 0);
340     }
341
342     if (has_blend && (num_blends > 0))
343     {
344         if ((fvf & WINED3DFVF_XYZB5) == WINED3DFVF_XYZB2 && (fvf & WINED3DFVF_LASTBETA_D3DCOLOR))
345             append_decl_element(&state, WINED3DFMT_B8G8R8A8_UNORM, WINED3DDECLUSAGE_BLENDWEIGHT, 0);
346         else
347         {
348             switch (num_blends)
349             {
350                 case 1:
351                     append_decl_element(&state, WINED3DFMT_R32_FLOAT, WINED3DDECLUSAGE_BLENDWEIGHT, 0);
352                     break;
353                 case 2:
354                     append_decl_element(&state, WINED3DFMT_R32G32_FLOAT, WINED3DDECLUSAGE_BLENDWEIGHT, 0);
355                     break;
356                 case 3:
357                     append_decl_element(&state, WINED3DFMT_R32G32B32_FLOAT, WINED3DDECLUSAGE_BLENDWEIGHT, 0);
358                     break;
359                 case 4:
360                     append_decl_element(&state, WINED3DFMT_R32G32B32A32_FLOAT, WINED3DDECLUSAGE_BLENDWEIGHT, 0);
361                     break;
362                 default:
363                     ERR("Unexpected amount of blend values: %u\n", num_blends);
364             }
365         }
366     }
367
368     if (has_blend_idx)
369     {
370         if ((fvf & WINED3DFVF_LASTBETA_UBYTE4)
371                 || ((fvf & WINED3DFVF_XYZB5) == WINED3DFVF_XYZB2 && (fvf & WINED3DFVF_LASTBETA_D3DCOLOR)))
372             append_decl_element(&state, WINED3DFMT_R8G8B8A8_UINT, WINED3DDECLUSAGE_BLENDINDICES, 0);
373         else if (fvf & WINED3DFVF_LASTBETA_D3DCOLOR)
374             append_decl_element(&state, WINED3DFMT_B8G8R8A8_UNORM, WINED3DDECLUSAGE_BLENDINDICES, 0);
375         else
376             append_decl_element(&state, WINED3DFMT_R32_FLOAT, WINED3DDECLUSAGE_BLENDINDICES, 0);
377     }
378
379     if (has_normal) append_decl_element(&state, WINED3DFMT_R32G32B32_FLOAT, WINED3DDECLUSAGE_NORMAL, 0);
380     if (has_psize) append_decl_element(&state, WINED3DFMT_R32_FLOAT, WINED3DDECLUSAGE_PSIZE, 0);
381     if (has_diffuse) append_decl_element(&state, WINED3DFMT_B8G8R8A8_UNORM, WINED3DDECLUSAGE_COLOR, 0);
382     if (has_specular) append_decl_element(&state, WINED3DFMT_B8G8R8A8_UNORM, WINED3DDECLUSAGE_COLOR, 1);
383
384     for (idx = 0; idx < num_textures; ++idx)
385     {
386         switch ((texcoords >> (idx * 2)) & 0x03)
387         {
388             case WINED3DFVF_TEXTUREFORMAT1:
389                 append_decl_element(&state, WINED3DFMT_R32_FLOAT, WINED3DDECLUSAGE_TEXCOORD, idx);
390                 break;
391             case WINED3DFVF_TEXTUREFORMAT2:
392                 append_decl_element(&state, WINED3DFMT_R32G32_FLOAT, WINED3DDECLUSAGE_TEXCOORD, idx);
393                 break;
394             case WINED3DFVF_TEXTUREFORMAT3:
395                 append_decl_element(&state, WINED3DFMT_R32G32B32_FLOAT, WINED3DDECLUSAGE_TEXCOORD, idx);
396                 break;
397             case WINED3DFVF_TEXTUREFORMAT4:
398                 append_decl_element(&state, WINED3DFMT_R32G32B32A32_FLOAT, WINED3DDECLUSAGE_TEXCOORD, idx);
399                 break;
400         }
401     }
402
403     *elements = state.elements;
404     return size;
405 }
406
407 HRESULT CDECL wined3d_vertex_declaration_create_from_fvf(struct wined3d_device *device,
408         DWORD fvf, void *parent, const struct wined3d_parent_ops *parent_ops,
409         struct wined3d_vertex_declaration **declaration)
410 {
411     WINED3DVERTEXELEMENT *elements;
412     unsigned int size;
413     DWORD hr;
414
415     TRACE("device %p, fvf %#x, parent %p, parent_ops %p, declaration %p.\n",
416             device, fvf, parent, parent_ops, declaration);
417
418     size = convert_fvf_to_declaration(&device->adapter->gl_info, fvf, &elements);
419     if (size == ~0U) return E_OUTOFMEMORY;
420
421     hr = wined3d_vertex_declaration_create(device, elements, size, parent, parent_ops, declaration);
422     HeapFree(GetProcessHeap(), 0, elements);
423     return hr;
424 }