d3d10core: Fixup HRESULT in a bunch of error cases.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     TRACE("surface %p.\n", surface);
46
47     if (surface->texture_name || (surface->flags & SFLAG_PBO)
48              || surface->rb_multisample || surface->rb_resolved
49              || !list_empty(&surface->renderbuffers))
50     {
51         struct wined3d_renderbuffer_entry *entry, *entry2;
52         const struct wined3d_gl_info *gl_info;
53         struct wined3d_context *context;
54
55         context = context_acquire(surface->resource.device, NULL);
56         gl_info = context->gl_info;
57
58         ENTER_GL();
59
60         if (surface->texture_name)
61         {
62             TRACE("Deleting texture %u.\n", surface->texture_name);
63             glDeleteTextures(1, &surface->texture_name);
64         }
65
66         if (surface->flags & SFLAG_PBO)
67         {
68             TRACE("Deleting PBO %u.\n", surface->pbo);
69             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
70         }
71
72         if (surface->rb_multisample)
73         {
74             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
75             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
76         }
77
78         if (surface->rb_resolved)
79         {
80             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
81             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
82         }
83
84         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
85         {
86             TRACE("Deleting renderbuffer %u.\n", entry->id);
87             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
88             HeapFree(GetProcessHeap(), 0, entry);
89         }
90
91         LEAVE_GL();
92
93         context_release(context);
94     }
95
96     if (surface->flags & SFLAG_DIBSECTION)
97     {
98         /* Release the DC. */
99         SelectObject(surface->hDC, surface->dib.holdbitmap);
100         DeleteDC(surface->hDC);
101         /* Release the DIB section. */
102         DeleteObject(surface->dib.DIBsection);
103         surface->dib.bitmap_data = NULL;
104         surface->resource.allocatedMemory = NULL;
105     }
106
107     if (surface->flags & SFLAG_USERPTR)
108         wined3d_surface_set_mem(surface, NULL);
109     if (surface->overlay_dest)
110         list_remove(&surface->overlay_entry);
111
112     HeapFree(GetProcessHeap(), 0, surface->palette9);
113
114     resource_cleanup(&surface->resource);
115 }
116
117 void surface_update_draw_binding(struct wined3d_surface *surface)
118 {
119     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
120         surface->draw_binding = SFLAG_INDRAWABLE;
121     else if (surface->resource.multisample_type)
122         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
123     else
124         surface->draw_binding = SFLAG_INTEXTURE;
125 }
126
127 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
128 {
129     TRACE("surface %p, container %p.\n", surface, container);
130
131     if (!container && type != WINED3D_CONTAINER_NONE)
132         ERR("Setting NULL container of type %#x.\n", type);
133
134     if (type == WINED3D_CONTAINER_SWAPCHAIN)
135     {
136         surface->get_drawable_size = get_drawable_size_swapchain;
137     }
138     else
139     {
140         switch (wined3d_settings.offscreen_rendering_mode)
141         {
142             case ORM_FBO:
143                 surface->get_drawable_size = get_drawable_size_fbo;
144                 break;
145
146             case ORM_BACKBUFFER:
147                 surface->get_drawable_size = get_drawable_size_backbuffer;
148                 break;
149
150             default:
151                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
152                 return;
153         }
154     }
155
156     surface->container.type = type;
157     surface->container.u.base = container;
158     surface_update_draw_binding(surface);
159 }
160
161 struct blt_info
162 {
163     GLenum binding;
164     GLenum bind_target;
165     enum tex_types tex_type;
166     GLfloat coords[4][3];
167 };
168
169 struct float_rect
170 {
171     float l;
172     float t;
173     float r;
174     float b;
175 };
176
177 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
178 {
179     f->l = ((r->left * 2.0f) / w) - 1.0f;
180     f->t = ((r->top * 2.0f) / h) - 1.0f;
181     f->r = ((r->right * 2.0f) / w) - 1.0f;
182     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
183 }
184
185 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
186 {
187     GLfloat (*coords)[3] = info->coords;
188     struct float_rect f;
189
190     switch (target)
191     {
192         default:
193             FIXME("Unsupported texture target %#x\n", target);
194             /* Fall back to GL_TEXTURE_2D */
195         case GL_TEXTURE_2D:
196             info->binding = GL_TEXTURE_BINDING_2D;
197             info->bind_target = GL_TEXTURE_2D;
198             info->tex_type = tex_2d;
199             coords[0][0] = (float)rect->left / w;
200             coords[0][1] = (float)rect->top / h;
201             coords[0][2] = 0.0f;
202
203             coords[1][0] = (float)rect->right / w;
204             coords[1][1] = (float)rect->top / h;
205             coords[1][2] = 0.0f;
206
207             coords[2][0] = (float)rect->left / w;
208             coords[2][1] = (float)rect->bottom / h;
209             coords[2][2] = 0.0f;
210
211             coords[3][0] = (float)rect->right / w;
212             coords[3][1] = (float)rect->bottom / h;
213             coords[3][2] = 0.0f;
214             break;
215
216         case GL_TEXTURE_RECTANGLE_ARB:
217             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
218             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
219             info->tex_type = tex_rect;
220             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
221             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
222             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
223             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
224             break;
225
226         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
227             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
228             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
229             info->tex_type = tex_cube;
230             cube_coords_float(rect, w, h, &f);
231
232             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
233             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
234             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
235             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
236             break;
237
238         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
239             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
240             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
241             info->tex_type = tex_cube;
242             cube_coords_float(rect, w, h, &f);
243
244             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
245             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
246             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
247             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
248             break;
249
250         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
251             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
252             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
253             info->tex_type = tex_cube;
254             cube_coords_float(rect, w, h, &f);
255
256             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
257             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
258             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
259             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
260             break;
261
262         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
263             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
264             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
265             info->tex_type = tex_cube;
266             cube_coords_float(rect, w, h, &f);
267
268             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
269             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
270             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
271             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
272             break;
273
274         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
275             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
276             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
277             info->tex_type = tex_cube;
278             cube_coords_float(rect, w, h, &f);
279
280             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
281             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
282             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
283             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
284             break;
285
286         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
287             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
288             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
289             info->tex_type = tex_cube;
290             cube_coords_float(rect, w, h, &f);
291
292             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
293             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
294             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
295             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
296             break;
297     }
298 }
299
300 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
301 {
302     if (rect_in)
303         *rect_out = *rect_in;
304     else
305     {
306         rect_out->left = 0;
307         rect_out->top = 0;
308         rect_out->right = surface->resource.width;
309         rect_out->bottom = surface->resource.height;
310     }
311 }
312
313 /* GL locking and context activation is done by the caller */
314 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
315         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
316 {
317     struct blt_info info;
318
319     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
320
321     glEnable(info.bind_target);
322     checkGLcall("glEnable(bind_target)");
323
324     context_bind_texture(context, info.bind_target, src_surface->texture_name);
325
326     /* Filtering for StretchRect */
327     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
328             wined3d_gl_mag_filter(magLookup, Filter));
329     checkGLcall("glTexParameteri");
330     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
331             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
334     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
335     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
336         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
337     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
338     checkGLcall("glTexEnvi");
339
340     /* Draw a quad */
341     glBegin(GL_TRIANGLE_STRIP);
342     glTexCoord3fv(info.coords[0]);
343     glVertex2i(dst_rect->left, dst_rect->top);
344
345     glTexCoord3fv(info.coords[1]);
346     glVertex2i(dst_rect->right, dst_rect->top);
347
348     glTexCoord3fv(info.coords[2]);
349     glVertex2i(dst_rect->left, dst_rect->bottom);
350
351     glTexCoord3fv(info.coords[3]);
352     glVertex2i(dst_rect->right, dst_rect->bottom);
353     glEnd();
354
355     /* Unbind the texture */
356     context_bind_texture(context, info.bind_target, 0);
357
358     /* We changed the filtering settings on the texture. Inform the
359      * container about this to get the filters reset properly next draw. */
360     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
361     {
362         struct wined3d_texture *texture = src_surface->container.u.texture;
363         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
364         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
365         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
366         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
367     }
368 }
369
370 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
371 {
372     const struct wined3d_format *format = surface->resource.format;
373     SYSTEM_INFO sysInfo;
374     BITMAPINFO *b_info;
375     int extraline = 0;
376     DWORD *masks;
377     UINT usage;
378     HDC dc;
379
380     TRACE("surface %p.\n", surface);
381
382     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
383     {
384         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
385         return WINED3DERR_INVALIDCALL;
386     }
387
388     switch (format->byte_count)
389     {
390         case 2:
391         case 4:
392             /* Allocate extra space to store the RGB bit masks. */
393             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
394             break;
395
396         case 3:
397             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
398             break;
399
400         default:
401             /* Allocate extra space for a palette. */
402             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
403                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
404             break;
405     }
406
407     if (!b_info)
408         return E_OUTOFMEMORY;
409
410     /* Some applications access the surface in via DWORDs, and do not take
411      * the necessary care at the end of the surface. So we need at least
412      * 4 extra bytes at the end of the surface. Check against the page size,
413      * if the last page used for the surface has at least 4 spare bytes we're
414      * safe, otherwise add an extra line to the DIB section. */
415     GetSystemInfo(&sysInfo);
416     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
417     {
418         extraline = 1;
419         TRACE("Adding an extra line to the DIB section.\n");
420     }
421
422     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
423     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
424     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
425     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
426     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
427             * wined3d_surface_get_pitch(surface);
428     b_info->bmiHeader.biPlanes = 1;
429     b_info->bmiHeader.biBitCount = format->byte_count * 8;
430
431     b_info->bmiHeader.biXPelsPerMeter = 0;
432     b_info->bmiHeader.biYPelsPerMeter = 0;
433     b_info->bmiHeader.biClrUsed = 0;
434     b_info->bmiHeader.biClrImportant = 0;
435
436     /* Get the bit masks */
437     masks = (DWORD *)b_info->bmiColors;
438     switch (surface->resource.format->id)
439     {
440         case WINED3DFMT_B8G8R8_UNORM:
441             usage = DIB_RGB_COLORS;
442             b_info->bmiHeader.biCompression = BI_RGB;
443             break;
444
445         case WINED3DFMT_B5G5R5X1_UNORM:
446         case WINED3DFMT_B5G5R5A1_UNORM:
447         case WINED3DFMT_B4G4R4A4_UNORM:
448         case WINED3DFMT_B4G4R4X4_UNORM:
449         case WINED3DFMT_B2G3R3_UNORM:
450         case WINED3DFMT_B2G3R3A8_UNORM:
451         case WINED3DFMT_R10G10B10A2_UNORM:
452         case WINED3DFMT_R8G8B8A8_UNORM:
453         case WINED3DFMT_R8G8B8X8_UNORM:
454         case WINED3DFMT_B10G10R10A2_UNORM:
455         case WINED3DFMT_B5G6R5_UNORM:
456         case WINED3DFMT_R16G16B16A16_UNORM:
457             usage = 0;
458             b_info->bmiHeader.biCompression = BI_BITFIELDS;
459             masks[0] = format->red_mask;
460             masks[1] = format->green_mask;
461             masks[2] = format->blue_mask;
462             break;
463
464         default:
465             /* Don't know palette */
466             b_info->bmiHeader.biCompression = BI_RGB;
467             usage = 0;
468             break;
469     }
470
471     if (!(dc = GetDC(0)))
472     {
473         HeapFree(GetProcessHeap(), 0, b_info);
474         return HRESULT_FROM_WIN32(GetLastError());
475     }
476
477     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
478             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
479             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
480     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
481     ReleaseDC(0, dc);
482
483     if (!surface->dib.DIBsection)
484     {
485         ERR("Failed to create DIB section.\n");
486         HeapFree(GetProcessHeap(), 0, b_info);
487         return HRESULT_FROM_WIN32(GetLastError());
488     }
489
490     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
491     /* Copy the existing surface to the dib section. */
492     if (surface->resource.allocatedMemory)
493     {
494         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
495                 surface->resource.height * wined3d_surface_get_pitch(surface));
496     }
497     else
498     {
499         /* This is to make maps read the GL texture although memory is allocated. */
500         surface->flags &= ~SFLAG_INSYSMEM;
501     }
502     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
503
504     HeapFree(GetProcessHeap(), 0, b_info);
505
506     /* Now allocate a DC. */
507     surface->hDC = CreateCompatibleDC(0);
508     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
509     TRACE("Using wined3d palette %p.\n", surface->palette);
510     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
511
512     surface->flags |= SFLAG_DIBSECTION;
513
514     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
515     surface->resource.heapMemory = NULL;
516
517     return WINED3D_OK;
518 }
519
520 static void surface_prepare_system_memory(struct wined3d_surface *surface)
521 {
522     struct wined3d_device *device = surface->resource.device;
523     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
524
525     TRACE("surface %p.\n", surface);
526
527     /* Performance optimization: Count how often a surface is locked, if it is
528      * locked regularly do not throw away the system memory copy. This avoids
529      * the need to download the surface from OpenGL all the time. The surface
530      * is still downloaded if the OpenGL texture is changed. */
531     if (!(surface->flags & SFLAG_DYNLOCK))
532     {
533         if (++surface->lockCount > MAXLOCKCOUNT)
534         {
535             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
536             surface->flags |= SFLAG_DYNLOCK;
537         }
538     }
539
540     /* Create a PBO for dynamically locked surfaces but don't do it for
541      * converted or NPOT surfaces. Also don't create a PBO for systemmem
542      * surfaces. */
543     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
544             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
545             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
546     {
547         struct wined3d_context *context;
548         GLenum error;
549
550         context = context_acquire(device, NULL);
551         ENTER_GL();
552
553         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
554         error = glGetError();
555         if (!surface->pbo || error != GL_NO_ERROR)
556             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
557
558         TRACE("Binding PBO %u.\n", surface->pbo);
559
560         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
561         checkGLcall("glBindBufferARB");
562
563         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
564                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
565         checkGLcall("glBufferDataARB");
566
567         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
568         checkGLcall("glBindBufferARB");
569
570         /* We don't need the system memory anymore and we can't even use it for PBOs. */
571         if (!(surface->flags & SFLAG_CLIENT))
572         {
573             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
574             surface->resource.heapMemory = NULL;
575         }
576         surface->resource.allocatedMemory = NULL;
577         surface->flags |= SFLAG_PBO;
578         LEAVE_GL();
579         context_release(context);
580     }
581     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
582     {
583         /* Whatever surface we have, make sure that there is memory allocated
584          * for the downloaded copy, or a PBO to map. */
585         if (!surface->resource.heapMemory)
586             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
587
588         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
589                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
590
591         if (surface->flags & SFLAG_INSYSMEM)
592             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
593     }
594 }
595
596 static void surface_evict_sysmem(struct wined3d_surface *surface)
597 {
598     if (surface->flags & SFLAG_DONOTFREE)
599         return;
600
601     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
602     surface->resource.allocatedMemory = NULL;
603     surface->resource.heapMemory = NULL;
604     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
605 }
606
607 /* Context activation is done by the caller. */
608 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
609         struct wined3d_context *context, BOOL srgb)
610 {
611     struct wined3d_device *device = surface->resource.device;
612     DWORD active_sampler;
613
614     /* We don't need a specific texture unit, but after binding the texture
615      * the current unit is dirty. Read the unit back instead of switching to
616      * 0, this avoids messing around with the state manager's GL states. The
617      * current texture unit should always be a valid one.
618      *
619      * To be more specific, this is tricky because we can implicitly be
620      * called from sampler() in state.c. This means we can't touch anything
621      * other than whatever happens to be the currently active texture, or we
622      * would risk marking already applied sampler states dirty again. */
623     active_sampler = device->rev_tex_unit_map[context->active_texture];
624
625     if (active_sampler != WINED3D_UNMAPPED_STAGE)
626         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
627     surface_bind(surface, context, srgb);
628 }
629
630 static void surface_force_reload(struct wined3d_surface *surface)
631 {
632     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
633 }
634
635 static void surface_release_client_storage(struct wined3d_surface *surface)
636 {
637     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
638
639     ENTER_GL();
640     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
641     if (surface->texture_name)
642     {
643         surface_bind_and_dirtify(surface, context, FALSE);
644         glTexImage2D(surface->texture_target, surface->texture_level,
645                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
646     }
647     if (surface->texture_name_srgb)
648     {
649         surface_bind_and_dirtify(surface, context, TRUE);
650         glTexImage2D(surface->texture_target, surface->texture_level,
651                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
652     }
653     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
654     LEAVE_GL();
655
656     context_release(context);
657
658     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
659     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
660     surface_force_reload(surface);
661 }
662
663 static HRESULT surface_private_setup(struct wined3d_surface *surface)
664 {
665     /* TODO: Check against the maximum texture sizes supported by the video card. */
666     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
667     unsigned int pow2Width, pow2Height;
668
669     TRACE("surface %p.\n", surface);
670
671     surface->texture_name = 0;
672     surface->texture_target = GL_TEXTURE_2D;
673
674     /* Non-power2 support */
675     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
676     {
677         pow2Width = surface->resource.width;
678         pow2Height = surface->resource.height;
679     }
680     else
681     {
682         /* Find the nearest pow2 match */
683         pow2Width = pow2Height = 1;
684         while (pow2Width < surface->resource.width)
685             pow2Width <<= 1;
686         while (pow2Height < surface->resource.height)
687             pow2Height <<= 1;
688     }
689     surface->pow2Width = pow2Width;
690     surface->pow2Height = pow2Height;
691
692     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
693     {
694         /* TODO: Add support for non power two compressed textures. */
695         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
696         {
697             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
698                   surface, surface->resource.width, surface->resource.height);
699             return WINED3DERR_NOTAVAILABLE;
700         }
701     }
702
703     if (pow2Width != surface->resource.width
704             || pow2Height != surface->resource.height)
705     {
706         surface->flags |= SFLAG_NONPOW2;
707     }
708
709     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
710             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
711     {
712         /* One of three options:
713          * 1: Do the same as we do with NPOT and scale the texture, (any
714          *    texture ops would require the texture to be scaled which is
715          *    potentially slow)
716          * 2: Set the texture to the maximum size (bad idea).
717          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
718          * 4: Create the surface, but allow it to be used only for DirectDraw
719          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
720          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
721          *    the render target. */
722         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
723         {
724             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
725             return WINED3DERR_NOTAVAILABLE;
726         }
727
728         /* We should never use this surface in combination with OpenGL! */
729         TRACE("Creating an oversized surface: %ux%u.\n",
730                 surface->pow2Width, surface->pow2Height);
731     }
732     else
733     {
734         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
735          * and EXT_PALETTED_TEXTURE is used in combination with texture
736          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
737          * EXT_PALETTED_TEXTURE doesn't work in combination with
738          * ARB_TEXTURE_RECTANGLE. */
739         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
740                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
741                 && gl_info->supported[EXT_PALETTED_TEXTURE]
742                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
743         {
744             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
745             surface->pow2Width = surface->resource.width;
746             surface->pow2Height = surface->resource.height;
747             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
748         }
749     }
750
751     switch (wined3d_settings.offscreen_rendering_mode)
752     {
753         case ORM_FBO:
754             surface->get_drawable_size = get_drawable_size_fbo;
755             break;
756
757         case ORM_BACKBUFFER:
758             surface->get_drawable_size = get_drawable_size_backbuffer;
759             break;
760
761         default:
762             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
763             return WINED3DERR_INVALIDCALL;
764     }
765
766     surface->flags |= SFLAG_INSYSMEM;
767
768     return WINED3D_OK;
769 }
770
771 static void surface_realize_palette(struct wined3d_surface *surface)
772 {
773     struct wined3d_palette *palette = surface->palette;
774
775     TRACE("surface %p.\n", surface);
776
777     if (!palette) return;
778
779     if (surface->resource.format->id == WINED3DFMT_P8_UINT
780             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
781     {
782         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
783         {
784             /* Make sure the texture is up to date. This call doesn't do
785              * anything if the texture is already up to date. */
786             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
787
788             /* We want to force a palette refresh, so mark the drawable as not being up to date */
789             if (!surface_is_offscreen(surface))
790                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
791         }
792         else
793         {
794             if (!(surface->flags & SFLAG_INSYSMEM))
795             {
796                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
797                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
798             }
799             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
800         }
801     }
802
803     if (surface->flags & SFLAG_DIBSECTION)
804     {
805         RGBQUAD col[256];
806         unsigned int i;
807
808         TRACE("Updating the DC's palette.\n");
809
810         for (i = 0; i < 256; ++i)
811         {
812             col[i].rgbRed   = palette->palents[i].peRed;
813             col[i].rgbGreen = palette->palents[i].peGreen;
814             col[i].rgbBlue  = palette->palents[i].peBlue;
815             col[i].rgbReserved = 0;
816         }
817         SetDIBColorTable(surface->hDC, 0, 256, col);
818     }
819
820     /* Propagate the changes to the drawable when we have a palette. */
821     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
822         surface_load_location(surface, surface->draw_binding, NULL);
823 }
824
825 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
826 {
827     HRESULT hr;
828
829     /* If there's no destination surface there is nothing to do. */
830     if (!surface->overlay_dest)
831         return WINED3D_OK;
832
833     /* Blt calls ModifyLocation on the dest surface, which in turn calls
834      * DrawOverlay to update the overlay. Prevent an endless recursion. */
835     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
836         return WINED3D_OK;
837
838     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
839     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
840             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
841     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
842
843     return hr;
844 }
845
846 static void surface_preload(struct wined3d_surface *surface)
847 {
848     TRACE("surface %p.\n", surface);
849
850     surface_internal_preload(surface, SRGB_ANY);
851 }
852
853 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
854 {
855     struct wined3d_device *device = surface->resource.device;
856     const RECT *pass_rect = rect;
857
858     TRACE("surface %p, rect %s, flags %#x.\n",
859             surface, wine_dbgstr_rect(rect), flags);
860
861     if (flags & WINED3DLOCK_DISCARD)
862     {
863         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
864         surface_prepare_system_memory(surface);
865         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
866     }
867     else
868     {
869         /* surface_load_location() does not check if the rectangle specifies
870          * the full surface. Most callers don't need that, so do it here. */
871         if (rect && !rect->top && !rect->left
872                 && rect->right == surface->resource.width
873                 && rect->bottom == surface->resource.height)
874             pass_rect = NULL;
875
876         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
877                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
878                 || surface == device->fb.render_targets[0])))
879             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
880     }
881
882     if (surface->flags & SFLAG_PBO)
883     {
884         const struct wined3d_gl_info *gl_info;
885         struct wined3d_context *context;
886
887         context = context_acquire(device, NULL);
888         gl_info = context->gl_info;
889
890         ENTER_GL();
891         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
892         checkGLcall("glBindBufferARB");
893
894         /* This shouldn't happen but could occur if some other function
895          * didn't handle the PBO properly. */
896         if (surface->resource.allocatedMemory)
897             ERR("The surface already has PBO memory allocated.\n");
898
899         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
900         checkGLcall("glMapBufferARB");
901
902         /* Make sure the PBO isn't set anymore in order not to break non-PBO
903          * calls. */
904         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
905         checkGLcall("glBindBufferARB");
906
907         LEAVE_GL();
908         context_release(context);
909     }
910
911     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
912     {
913         if (!rect)
914             surface_add_dirty_rect(surface, NULL);
915         else
916         {
917             WINED3DBOX b;
918
919             b.Left = rect->left;
920             b.Top = rect->top;
921             b.Right = rect->right;
922             b.Bottom = rect->bottom;
923             b.Front = 0;
924             b.Back = 1;
925             surface_add_dirty_rect(surface, &b);
926         }
927     }
928 }
929
930 static void surface_unmap(struct wined3d_surface *surface)
931 {
932     struct wined3d_device *device = surface->resource.device;
933     BOOL fullsurface;
934
935     TRACE("surface %p.\n", surface);
936
937     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
938
939     if (surface->flags & SFLAG_PBO)
940     {
941         const struct wined3d_gl_info *gl_info;
942         struct wined3d_context *context;
943
944         TRACE("Freeing PBO memory.\n");
945
946         context = context_acquire(device, NULL);
947         gl_info = context->gl_info;
948
949         ENTER_GL();
950         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
951         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
952         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
953         checkGLcall("glUnmapBufferARB");
954         LEAVE_GL();
955         context_release(context);
956
957         surface->resource.allocatedMemory = NULL;
958     }
959
960     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
961
962     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
963     {
964         TRACE("Not dirtified, nothing to do.\n");
965         goto done;
966     }
967
968     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
969             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
970     {
971         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
972         {
973             static BOOL warned = FALSE;
974             if (!warned)
975             {
976                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
977                 warned = TRUE;
978             }
979             goto done;
980         }
981
982         if (!surface->dirtyRect.left && !surface->dirtyRect.top
983                 && surface->dirtyRect.right == surface->resource.width
984                 && surface->dirtyRect.bottom == surface->resource.height)
985         {
986             fullsurface = TRUE;
987         }
988         else
989         {
990             /* TODO: Proper partial rectangle tracking. */
991             fullsurface = FALSE;
992             surface->flags |= SFLAG_INSYSMEM;
993         }
994
995         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
996
997         /* Partial rectangle tracking is not commonly implemented, it is only
998          * done for render targets. INSYSMEM was set before to tell
999          * surface_load_location() where to read the rectangle from.
1000          * Indrawable is set because all modifications from the partial
1001          * sysmem copy are written back to the drawable, thus the surface is
1002          * merged again in the drawable. The sysmem copy is not fully up to
1003          * date because only a subrectangle was read in Map(). */
1004         if (!fullsurface)
1005         {
1006             surface_modify_location(surface, surface->draw_binding, TRUE);
1007             surface_evict_sysmem(surface);
1008         }
1009
1010         surface->dirtyRect.left = surface->resource.width;
1011         surface->dirtyRect.top = surface->resource.height;
1012         surface->dirtyRect.right = 0;
1013         surface->dirtyRect.bottom = 0;
1014     }
1015     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1016     {
1017         FIXME("Depth / stencil buffer locking is not implemented.\n");
1018     }
1019
1020 done:
1021     /* Overlays have to be redrawn manually after changes with the GL implementation */
1022     if (surface->overlay_dest)
1023         surface->surface_ops->surface_draw_overlay(surface);
1024 }
1025
1026 static HRESULT surface_getdc(struct wined3d_surface *surface)
1027 {
1028     WINED3DLOCKED_RECT lock;
1029     HRESULT hr;
1030
1031     TRACE("surface %p.\n", surface);
1032
1033     /* Create a DIB section if there isn't a dc yet. */
1034     if (!surface->hDC)
1035     {
1036         if (surface->flags & SFLAG_CLIENT)
1037         {
1038             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1039             surface_release_client_storage(surface);
1040         }
1041         hr = surface_create_dib_section(surface);
1042         if (FAILED(hr))
1043             return WINED3DERR_INVALIDCALL;
1044
1045         /* Use the DIB section from now on if we are not using a PBO. */
1046         if (!(surface->flags & SFLAG_PBO))
1047             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1048     }
1049
1050     /* Map the surface. */
1051     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1052     if (FAILED(hr))
1053         ERR("Map failed, hr %#x.\n", hr);
1054
1055     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1056      * activates the allocatedMemory. */
1057     if (surface->flags & SFLAG_PBO)
1058         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
1059
1060     return hr;
1061 }
1062
1063 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1064 {
1065     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1066         return FALSE;
1067     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1068         return FALSE;
1069     return TRUE;
1070 }
1071
1072 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1073         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1074 {
1075     const struct wined3d_gl_info *gl_info;
1076     struct wined3d_context *context;
1077     DWORD src_mask, dst_mask;
1078     GLbitfield gl_mask;
1079
1080     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1081             device, src_surface, wine_dbgstr_rect(src_rect),
1082             dst_surface, wine_dbgstr_rect(dst_rect));
1083
1084     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1085     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1086
1087     if (src_mask != dst_mask)
1088     {
1089         ERR("Incompatible formats %s and %s.\n",
1090                 debug_d3dformat(src_surface->resource.format->id),
1091                 debug_d3dformat(dst_surface->resource.format->id));
1092         return;
1093     }
1094
1095     if (!src_mask)
1096     {
1097         ERR("Not a depth / stencil format: %s.\n",
1098                 debug_d3dformat(src_surface->resource.format->id));
1099         return;
1100     }
1101
1102     gl_mask = 0;
1103     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1104         gl_mask |= GL_DEPTH_BUFFER_BIT;
1105     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1106         gl_mask |= GL_STENCIL_BUFFER_BIT;
1107
1108     /* Make sure the locations are up-to-date. Loading the destination
1109      * surface isn't required if the entire surface is overwritten. */
1110     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1111     if (!surface_is_full_rect(dst_surface, dst_rect))
1112         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1113
1114     context = context_acquire(device, NULL);
1115     if (!context->valid)
1116     {
1117         context_release(context);
1118         WARN("Invalid context, skipping blit.\n");
1119         return;
1120     }
1121
1122     gl_info = context->gl_info;
1123
1124     ENTER_GL();
1125
1126     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1127     glReadBuffer(GL_NONE);
1128     checkGLcall("glReadBuffer()");
1129     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1130
1131     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1132     context_set_draw_buffer(context, GL_NONE);
1133     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1134
1135     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1136     {
1137         glDepthMask(GL_TRUE);
1138         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1139     }
1140     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1141     {
1142         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1143         {
1144             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1145             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1146         }
1147         glStencilMask(~0U);
1148         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1149     }
1150
1151     glDisable(GL_SCISSOR_TEST);
1152     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1153
1154     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1155             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1156     checkGLcall("glBlitFramebuffer()");
1157
1158     LEAVE_GL();
1159
1160     if (wined3d_settings.strict_draw_ordering)
1161         wglFlush(); /* Flush to ensure ordering across contexts. */
1162
1163     context_release(context);
1164 }
1165
1166 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1167  * Depth / stencil is not supported. */
1168 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1169         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1170         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1171 {
1172     const struct wined3d_gl_info *gl_info;
1173     struct wined3d_context *context;
1174     RECT src_rect, dst_rect;
1175     GLenum gl_filter;
1176     GLenum buffer;
1177
1178     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1179     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1180             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1181     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1182             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1183
1184     src_rect = *src_rect_in;
1185     dst_rect = *dst_rect_in;
1186
1187     switch (filter)
1188     {
1189         case WINED3DTEXF_LINEAR:
1190             gl_filter = GL_LINEAR;
1191             break;
1192
1193         default:
1194             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1195         case WINED3DTEXF_NONE:
1196         case WINED3DTEXF_POINT:
1197             gl_filter = GL_NEAREST;
1198             break;
1199     }
1200
1201     /* Resolve the source surface first if needed. */
1202     if (src_location == SFLAG_INRB_MULTISAMPLE
1203             && (src_surface->resource.format->id != dst_surface->resource.format->id
1204                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1205                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1206         src_location = SFLAG_INRB_RESOLVED;
1207
1208     /* Make sure the locations are up-to-date. Loading the destination
1209      * surface isn't required if the entire surface is overwritten. (And is
1210      * in fact harmful if we're being called by surface_load_location() with
1211      * the purpose of loading the destination surface.) */
1212     surface_load_location(src_surface, src_location, NULL);
1213     if (!surface_is_full_rect(dst_surface, &dst_rect))
1214         surface_load_location(dst_surface, dst_location, NULL);
1215
1216     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1217     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1218     else context = context_acquire(device, NULL);
1219
1220     if (!context->valid)
1221     {
1222         context_release(context);
1223         WARN("Invalid context, skipping blit.\n");
1224         return;
1225     }
1226
1227     gl_info = context->gl_info;
1228
1229     if (src_location == SFLAG_INDRAWABLE)
1230     {
1231         TRACE("Source surface %p is onscreen.\n", src_surface);
1232         buffer = surface_get_gl_buffer(src_surface);
1233         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1234     }
1235     else
1236     {
1237         TRACE("Source surface %p is offscreen.\n", src_surface);
1238         buffer = GL_COLOR_ATTACHMENT0;
1239     }
1240
1241     ENTER_GL();
1242     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1243     glReadBuffer(buffer);
1244     checkGLcall("glReadBuffer()");
1245     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1246     LEAVE_GL();
1247
1248     if (dst_location == SFLAG_INDRAWABLE)
1249     {
1250         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1251         buffer = surface_get_gl_buffer(dst_surface);
1252         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1253     }
1254     else
1255     {
1256         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1257         buffer = GL_COLOR_ATTACHMENT0;
1258     }
1259
1260     ENTER_GL();
1261     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1262     context_set_draw_buffer(context, buffer);
1263     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1264     context_invalidate_state(context, STATE_FRAMEBUFFER);
1265
1266     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1267     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1268     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1269     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1270     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1271
1272     glDisable(GL_SCISSOR_TEST);
1273     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1274
1275     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1276             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1277     checkGLcall("glBlitFramebuffer()");
1278
1279     LEAVE_GL();
1280
1281     if (wined3d_settings.strict_draw_ordering
1282             || (dst_location == SFLAG_INDRAWABLE
1283             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1284         wglFlush();
1285
1286     context_release(context);
1287 }
1288
1289 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1290         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1291         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1292 {
1293     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1294         return FALSE;
1295
1296     /* Source and/or destination need to be on the GL side */
1297     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1298         return FALSE;
1299
1300     switch (blit_op)
1301     {
1302         case WINED3D_BLIT_OP_COLOR_BLIT:
1303             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1304                 return FALSE;
1305             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1306                 return FALSE;
1307             break;
1308
1309         case WINED3D_BLIT_OP_DEPTH_BLIT:
1310             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1311                 return FALSE;
1312             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1313                 return FALSE;
1314             break;
1315
1316         default:
1317             return FALSE;
1318     }
1319
1320     if (!(src_format->id == dst_format->id
1321             || (is_identity_fixup(src_format->color_fixup)
1322             && is_identity_fixup(dst_format->color_fixup))))
1323         return FALSE;
1324
1325     return TRUE;
1326 }
1327
1328 /* This function checks if the primary render target uses the 8bit paletted format. */
1329 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1330 {
1331     if (device->fb.render_targets && device->fb.render_targets[0])
1332     {
1333         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1334         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1335                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1336             return TRUE;
1337     }
1338     return FALSE;
1339 }
1340
1341 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1342         DWORD color, WINED3DCOLORVALUE *float_color)
1343 {
1344     const struct wined3d_format *format = surface->resource.format;
1345     const struct wined3d_device *device = surface->resource.device;
1346
1347     switch (format->id)
1348     {
1349         case WINED3DFMT_P8_UINT:
1350             if (surface->palette)
1351             {
1352                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1353                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1354                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1355             }
1356             else
1357             {
1358                 float_color->r = 0.0f;
1359                 float_color->g = 0.0f;
1360                 float_color->b = 0.0f;
1361             }
1362             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1363             break;
1364
1365         case WINED3DFMT_B5G6R5_UNORM:
1366             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1367             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1368             float_color->b = (color & 0x1f) / 31.0f;
1369             float_color->a = 1.0f;
1370             break;
1371
1372         case WINED3DFMT_B8G8R8_UNORM:
1373         case WINED3DFMT_B8G8R8X8_UNORM:
1374             float_color->r = D3DCOLOR_R(color);
1375             float_color->g = D3DCOLOR_G(color);
1376             float_color->b = D3DCOLOR_B(color);
1377             float_color->a = 1.0f;
1378             break;
1379
1380         case WINED3DFMT_B8G8R8A8_UNORM:
1381             float_color->r = D3DCOLOR_R(color);
1382             float_color->g = D3DCOLOR_G(color);
1383             float_color->b = D3DCOLOR_B(color);
1384             float_color->a = D3DCOLOR_A(color);
1385             break;
1386
1387         default:
1388             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1389             return FALSE;
1390     }
1391
1392     return TRUE;
1393 }
1394
1395 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1396 {
1397     const struct wined3d_format *format = surface->resource.format;
1398
1399     switch (format->id)
1400     {
1401         case WINED3DFMT_S1_UINT_D15_UNORM:
1402             *float_depth = depth / (float)0x00007fff;
1403             break;
1404
1405         case WINED3DFMT_D16_UNORM:
1406             *float_depth = depth / (float)0x0000ffff;
1407             break;
1408
1409         case WINED3DFMT_D24_UNORM_S8_UINT:
1410         case WINED3DFMT_X8D24_UNORM:
1411             *float_depth = depth / (float)0x00ffffff;
1412             break;
1413
1414         case WINED3DFMT_D32_UNORM:
1415             *float_depth = depth / (float)0xffffffff;
1416             break;
1417
1418         default:
1419             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1420             return FALSE;
1421     }
1422
1423     return TRUE;
1424 }
1425
1426 /* Do not call while under the GL lock. */
1427 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1428 {
1429     const struct wined3d_resource *resource = &surface->resource;
1430     struct wined3d_device *device = resource->device;
1431     const struct blit_shader *blitter;
1432
1433     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1434             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1435     if (!blitter)
1436     {
1437         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1438         return WINED3DERR_INVALIDCALL;
1439     }
1440
1441     return blitter->depth_fill(device, surface, rect, depth);
1442 }
1443
1444 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1445         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1446 {
1447     struct wined3d_device *device = src_surface->resource.device;
1448
1449     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1450             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1451             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1452         return WINED3DERR_INVALIDCALL;
1453
1454     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1455
1456     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1457             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1458     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1459
1460     return WINED3D_OK;
1461 }
1462
1463 /* Do not call while under the GL lock. */
1464 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1465         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1466         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1467 {
1468     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1469     struct wined3d_device *device = dst_surface->resource.device;
1470     DWORD src_ds_flags, dst_ds_flags;
1471     RECT src_rect, dst_rect;
1472
1473     static const DWORD simple_blit = WINEDDBLT_ASYNC
1474             | WINEDDBLT_COLORFILL
1475             | WINEDDBLT_WAIT
1476             | WINEDDBLT_DEPTHFILL
1477             | WINEDDBLT_DONOTWAIT;
1478
1479     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1480             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1481             flags, fx, debug_d3dtexturefiltertype(filter));
1482     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1483
1484     if (fx)
1485     {
1486         TRACE("dwSize %#x.\n", fx->dwSize);
1487         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1488         TRACE("dwROP %#x.\n", fx->dwROP);
1489         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1490         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1491         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1492         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1493         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1494         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1495         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1496         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1497         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1498         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1499         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1500         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1501         TRACE("dwReserved %#x.\n", fx->dwReserved);
1502         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1503         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1504         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1505         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1506         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1507         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1508                 fx->ddckDestColorkey.dwColorSpaceLowValue,
1509                 fx->ddckDestColorkey.dwColorSpaceHighValue);
1510         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1511                 fx->ddckSrcColorkey.dwColorSpaceLowValue,
1512                 fx->ddckSrcColorkey.dwColorSpaceHighValue);
1513     }
1514
1515     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1516     {
1517         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1518         return WINEDDERR_SURFACEBUSY;
1519     }
1520
1521     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1522
1523     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1524             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1525             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1526             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1527             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1528     {
1529         /* The destination rect can be out of bounds on the condition
1530          * that a clipper is set for the surface. */
1531         if (dst_surface->clipper)
1532             FIXME("Blit clipping not implemented.\n");
1533         else
1534             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1535         return WINEDDERR_INVALIDRECT;
1536     }
1537
1538     if (src_surface)
1539     {
1540         surface_get_rect(src_surface, src_rect_in, &src_rect);
1541
1542         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1543                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1544                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1545                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1546                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1547         {
1548             WARN("Application gave us bad source rectangle for Blt.\n");
1549             return WINEDDERR_INVALIDRECT;
1550         }
1551     }
1552     else
1553     {
1554         memset(&src_rect, 0, sizeof(src_rect));
1555     }
1556
1557     if (!fx || !(fx->dwDDFX))
1558         flags &= ~WINEDDBLT_DDFX;
1559
1560     if (flags & WINEDDBLT_WAIT)
1561         flags &= ~WINEDDBLT_WAIT;
1562
1563     if (flags & WINEDDBLT_ASYNC)
1564     {
1565         static unsigned int once;
1566
1567         if (!once++)
1568             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1569         flags &= ~WINEDDBLT_ASYNC;
1570     }
1571
1572     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1573     if (flags & WINEDDBLT_DONOTWAIT)
1574     {
1575         static unsigned int once;
1576
1577         if (!once++)
1578             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1579         flags &= ~WINEDDBLT_DONOTWAIT;
1580     }
1581
1582     if (!device->d3d_initialized)
1583     {
1584         WARN("D3D not initialized, using fallback.\n");
1585         goto cpu;
1586     }
1587
1588     /* We want to avoid invalidating the sysmem location for converted
1589      * surfaces, since otherwise we'd have to convert the data back when
1590      * locking them. */
1591     if (dst_surface->flags & SFLAG_CONVERTED)
1592     {
1593         WARN("Converted surface, using CPU blit.\n");
1594         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1595     }
1596
1597     if (flags & ~simple_blit)
1598     {
1599         WARN("Using fallback for complex blit (%#x).\n", flags);
1600         goto fallback;
1601     }
1602
1603     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1604         src_swapchain = src_surface->container.u.swapchain;
1605     else
1606         src_swapchain = NULL;
1607
1608     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1609         dst_swapchain = dst_surface->container.u.swapchain;
1610     else
1611         dst_swapchain = NULL;
1612
1613     /* This isn't strictly needed. FBO blits for example could deal with
1614      * cross-swapchain blits by first downloading the source to a texture
1615      * before switching to the destination context. We just have this here to
1616      * not have to deal with the issue, since cross-swapchain blits should be
1617      * rare. */
1618     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1619     {
1620         FIXME("Using fallback for cross-swapchain blit.\n");
1621         goto fallback;
1622     }
1623
1624     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1625     if (src_surface)
1626         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1627     else
1628         src_ds_flags = 0;
1629
1630     if (src_ds_flags || dst_ds_flags)
1631     {
1632         if (flags & WINEDDBLT_DEPTHFILL)
1633         {
1634             float depth;
1635
1636             TRACE("Depth fill.\n");
1637
1638             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1639                 return WINED3DERR_INVALIDCALL;
1640
1641             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1642                 return WINED3D_OK;
1643         }
1644         else
1645         {
1646             /* Accessing depth / stencil surfaces is supposed to fail while in
1647              * a scene, except for fills, which seem to work. */
1648             if (device->inScene)
1649             {
1650                 WARN("Rejecting depth / stencil access while in scene.\n");
1651                 return WINED3DERR_INVALIDCALL;
1652             }
1653
1654             if (src_ds_flags != dst_ds_flags)
1655             {
1656                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1657                 return WINED3DERR_INVALIDCALL;
1658             }
1659
1660             if (src_rect.top || src_rect.left
1661                     || src_rect.bottom != src_surface->resource.height
1662                     || src_rect.right != src_surface->resource.width)
1663             {
1664                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1665                         wine_dbgstr_rect(&src_rect));
1666                 return WINED3DERR_INVALIDCALL;
1667             }
1668
1669             if (dst_rect.top || dst_rect.left
1670                     || dst_rect.bottom != dst_surface->resource.height
1671                     || dst_rect.right != dst_surface->resource.width)
1672             {
1673                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1674                         wine_dbgstr_rect(&src_rect));
1675                 return WINED3DERR_INVALIDCALL;
1676             }
1677
1678             if (src_surface->resource.height != dst_surface->resource.height
1679                     || src_surface->resource.width != dst_surface->resource.width)
1680             {
1681                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1682                 return WINED3DERR_INVALIDCALL;
1683             }
1684
1685             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1686                 return WINED3D_OK;
1687         }
1688     }
1689     else
1690     {
1691         if (flags & WINEDDBLT_COLORFILL)
1692         {
1693             WINED3DCOLORVALUE color;
1694
1695             TRACE("Color fill.\n");
1696
1697             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1698                 goto fallback;
1699
1700             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1701                 return WINED3D_OK;
1702         }
1703         else
1704         {
1705             TRACE("Color blit.\n");
1706
1707             /* Use present for back -> front blits. The idea behind this is
1708              * that present is potentially faster than a blit, in particular
1709              * when FBO blits aren't available. Some ddraw applications like
1710              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1711              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1712              * applications can't blit directly to the frontbuffer. */
1713             if (dst_swapchain && dst_swapchain->back_buffers
1714                     && dst_surface == dst_swapchain->front_buffer
1715                     && src_surface == dst_swapchain->back_buffers[0])
1716             {
1717                 WINED3DSWAPEFFECT swap_effect = dst_swapchain->presentParms.SwapEffect;
1718
1719                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1720
1721                 /* Set the swap effect to COPY, we don't want the backbuffer
1722                  * to become undefined. */
1723                 dst_swapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
1724                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1725                 dst_swapchain->presentParms.SwapEffect = swap_effect;
1726
1727                 return WINED3D_OK;
1728             }
1729
1730             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1731                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1732                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1733             {
1734                 TRACE("Using FBO blit.\n");
1735
1736                 surface_blt_fbo(device, filter,
1737                         src_surface, src_surface->draw_binding, &src_rect,
1738                         dst_surface, dst_surface->draw_binding, &dst_rect);
1739                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1740                 return WINED3D_OK;
1741             }
1742
1743             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1744                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1745                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1746             {
1747                 TRACE("Using arbfp blit.\n");
1748
1749                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1750                     return WINED3D_OK;
1751             }
1752         }
1753     }
1754
1755 fallback:
1756
1757     /* Special cases for render targets. */
1758     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1759             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1760     {
1761         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1762                 src_surface, &src_rect, flags, fx, filter)))
1763             return WINED3D_OK;
1764     }
1765
1766 cpu:
1767
1768     /* For the rest call the X11 surface implementation. For render targets
1769      * this should be implemented OpenGL accelerated in BltOverride, other
1770      * blits are rather rare. */
1771     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1772 }
1773
1774 /* Do not call while under the GL lock. */
1775 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1776         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1777 {
1778     RECT src_rect, dst_rect;
1779     DWORD flags = 0;
1780
1781     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect_in %s, trans %#x.\n",
1782             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1783
1784     surface_get_rect(src_surface, src_rect_in, &src_rect);
1785
1786     dst_rect.left = dst_x;
1787     dst_rect.top = dst_y;
1788     dst_rect.right = dst_x + src_rect.right - src_rect.left;
1789     dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1790
1791     if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1792         flags |= WINEDDBLT_KEYSRC;
1793     if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1794         flags |= WINEDDBLT_KEYDEST;
1795     if (trans & WINEDDBLTFAST_WAIT)
1796         flags |= WINEDDBLT_WAIT;
1797     if (trans & WINEDDBLTFAST_DONOTWAIT)
1798         flags |= WINEDDBLT_DONOTWAIT;
1799
1800     return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT);
1801 }
1802
1803 /* Context activation is done by the caller. */
1804 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1805 {
1806     if (!surface->resource.heapMemory)
1807     {
1808         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1809         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1810                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1811     }
1812
1813     ENTER_GL();
1814     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1815     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1816     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1817             surface->resource.size, surface->resource.allocatedMemory));
1818     checkGLcall("glGetBufferSubDataARB");
1819     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1820     checkGLcall("glDeleteBuffersARB");
1821     LEAVE_GL();
1822
1823     surface->pbo = 0;
1824     surface->flags &= ~SFLAG_PBO;
1825 }
1826
1827 /* Do not call while under the GL lock. */
1828 static void surface_unload(struct wined3d_resource *resource)
1829 {
1830     struct wined3d_surface *surface = surface_from_resource(resource);
1831     struct wined3d_renderbuffer_entry *entry, *entry2;
1832     struct wined3d_device *device = resource->device;
1833     const struct wined3d_gl_info *gl_info;
1834     struct wined3d_context *context;
1835
1836     TRACE("surface %p.\n", surface);
1837
1838     if (resource->pool == WINED3DPOOL_DEFAULT)
1839     {
1840         /* Default pool resources are supposed to be destroyed before Reset is called.
1841          * Implicit resources stay however. So this means we have an implicit render target
1842          * or depth stencil. The content may be destroyed, but we still have to tear down
1843          * opengl resources, so we cannot leave early.
1844          *
1845          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1846          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1847          * or the depth stencil into an FBO the texture or render buffer will be removed
1848          * and all flags get lost
1849          */
1850         surface_init_sysmem(surface);
1851     }
1852     else
1853     {
1854         /* Load the surface into system memory */
1855         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1856         surface_modify_location(surface, surface->draw_binding, FALSE);
1857     }
1858     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1859     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1860     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1861
1862     context = context_acquire(device, NULL);
1863     gl_info = context->gl_info;
1864
1865     /* Destroy PBOs, but load them into real sysmem before */
1866     if (surface->flags & SFLAG_PBO)
1867         surface_remove_pbo(surface, gl_info);
1868
1869     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1870      * all application-created targets the application has to release the surface
1871      * before calling _Reset
1872      */
1873     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1874     {
1875         ENTER_GL();
1876         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1877         LEAVE_GL();
1878         list_remove(&entry->entry);
1879         HeapFree(GetProcessHeap(), 0, entry);
1880     }
1881     list_init(&surface->renderbuffers);
1882     surface->current_renderbuffer = NULL;
1883
1884     ENTER_GL();
1885
1886     /* If we're in a texture, the texture name belongs to the texture.
1887      * Otherwise, destroy it. */
1888     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1889     {
1890         glDeleteTextures(1, &surface->texture_name);
1891         surface->texture_name = 0;
1892         glDeleteTextures(1, &surface->texture_name_srgb);
1893         surface->texture_name_srgb = 0;
1894     }
1895     if (surface->rb_multisample)
1896     {
1897         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1898         surface->rb_multisample = 0;
1899     }
1900     if (surface->rb_resolved)
1901     {
1902         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1903         surface->rb_resolved = 0;
1904     }
1905
1906     LEAVE_GL();
1907
1908     context_release(context);
1909
1910     resource_unload(resource);
1911 }
1912
1913 static const struct wined3d_resource_ops surface_resource_ops =
1914 {
1915     surface_unload,
1916 };
1917
1918 static const struct wined3d_surface_ops surface_ops =
1919 {
1920     surface_private_setup,
1921     surface_cleanup,
1922     surface_realize_palette,
1923     surface_draw_overlay,
1924     surface_preload,
1925     surface_map,
1926     surface_unmap,
1927     surface_getdc,
1928 };
1929
1930 /*****************************************************************************
1931  * Initializes the GDI surface, aka creates the DIB section we render to
1932  * The DIB section creation is done by calling GetDC, which will create the
1933  * section and releasing the dc to allow the app to use it. The dib section
1934  * will stay until the surface is released
1935  *
1936  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1937  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1938  * avoid confusion in the shared surface code.
1939  *
1940  * Returns:
1941  *  WINED3D_OK on success
1942  *  The return values of called methods on failure
1943  *
1944  *****************************************************************************/
1945 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1946 {
1947     HRESULT hr;
1948
1949     TRACE("surface %p.\n", surface);
1950
1951     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1952     {
1953         ERR("Overlays not yet supported by GDI surfaces.\n");
1954         return WINED3DERR_INVALIDCALL;
1955     }
1956
1957     /* Sysmem textures have memory already allocated - release it,
1958      * this avoids an unnecessary memcpy. */
1959     hr = surface_create_dib_section(surface);
1960     if (SUCCEEDED(hr))
1961     {
1962         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1963         surface->resource.heapMemory = NULL;
1964         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1965     }
1966
1967     /* We don't mind the nonpow2 stuff in GDI. */
1968     surface->pow2Width = surface->resource.width;
1969     surface->pow2Height = surface->resource.height;
1970
1971     return WINED3D_OK;
1972 }
1973
1974 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1975 {
1976     TRACE("surface %p.\n", surface);
1977
1978     if (surface->flags & SFLAG_DIBSECTION)
1979     {
1980         /* Release the DC. */
1981         SelectObject(surface->hDC, surface->dib.holdbitmap);
1982         DeleteDC(surface->hDC);
1983         /* Release the DIB section. */
1984         DeleteObject(surface->dib.DIBsection);
1985         surface->dib.bitmap_data = NULL;
1986         surface->resource.allocatedMemory = NULL;
1987     }
1988
1989     if (surface->flags & SFLAG_USERPTR)
1990         wined3d_surface_set_mem(surface, NULL);
1991     if (surface->overlay_dest)
1992         list_remove(&surface->overlay_entry);
1993
1994     HeapFree(GetProcessHeap(), 0, surface->palette9);
1995
1996     resource_cleanup(&surface->resource);
1997 }
1998
1999 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
2000 {
2001     struct wined3d_palette *palette = surface->palette;
2002
2003     TRACE("surface %p.\n", surface);
2004
2005     if (!palette) return;
2006
2007     if (surface->flags & SFLAG_DIBSECTION)
2008     {
2009         RGBQUAD col[256];
2010         unsigned int i;
2011
2012         TRACE("Updating the DC's palette.\n");
2013
2014         for (i = 0; i < 256; ++i)
2015         {
2016             col[i].rgbRed = palette->palents[i].peRed;
2017             col[i].rgbGreen = palette->palents[i].peGreen;
2018             col[i].rgbBlue = palette->palents[i].peBlue;
2019             col[i].rgbReserved = 0;
2020         }
2021         SetDIBColorTable(surface->hDC, 0, 256, col);
2022     }
2023
2024     /* Update the image because of the palette change. Some games like e.g.
2025      * Red Alert call SetEntries a lot to implement fading. */
2026     /* Tell the swapchain to update the screen. */
2027     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2028     {
2029         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2030         if (surface == swapchain->front_buffer)
2031         {
2032             x11_copy_to_screen(swapchain, NULL);
2033         }
2034     }
2035 }
2036
2037 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
2038 {
2039     FIXME("GDI surfaces can't draw overlays yet.\n");
2040     return E_FAIL;
2041 }
2042
2043 static void gdi_surface_preload(struct wined3d_surface *surface)
2044 {
2045     TRACE("surface %p.\n", surface);
2046
2047     ERR("Preloading GDI surfaces is not supported.\n");
2048 }
2049
2050 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
2051 {
2052     TRACE("surface %p, rect %s, flags %#x.\n",
2053             surface, wine_dbgstr_rect(rect), flags);
2054
2055     if (!surface->resource.allocatedMemory)
2056     {
2057         /* This happens on gdi surfaces if the application set a user pointer
2058          * and resets it. Recreate the DIB section. */
2059         surface_create_dib_section(surface);
2060         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2061     }
2062 }
2063
2064 static void gdi_surface_unmap(struct wined3d_surface *surface)
2065 {
2066     TRACE("surface %p.\n", surface);
2067
2068     /* Tell the swapchain to update the screen. */
2069     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2070     {
2071         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2072         if (surface == swapchain->front_buffer)
2073         {
2074             x11_copy_to_screen(swapchain, &surface->lockedRect);
2075         }
2076     }
2077
2078     memset(&surface->lockedRect, 0, sizeof(RECT));
2079 }
2080
2081 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
2082 {
2083     WINED3DLOCKED_RECT lock;
2084     HRESULT hr;
2085
2086     TRACE("surface %p.\n", surface);
2087
2088     /* Should have a DIB section already. */
2089     if (!(surface->flags & SFLAG_DIBSECTION))
2090     {
2091         WARN("DC not supported on this surface\n");
2092         return WINED3DERR_INVALIDCALL;
2093     }
2094
2095     /* Map the surface. */
2096     hr = wined3d_surface_map(surface, &lock, NULL, 0);
2097     if (FAILED(hr))
2098         ERR("Map failed, hr %#x.\n", hr);
2099
2100     return hr;
2101 }
2102
2103 static const struct wined3d_surface_ops gdi_surface_ops =
2104 {
2105     gdi_surface_private_setup,
2106     surface_gdi_cleanup,
2107     gdi_surface_realize_palette,
2108     gdi_surface_draw_overlay,
2109     gdi_surface_preload,
2110     gdi_surface_map,
2111     gdi_surface_unmap,
2112     gdi_surface_getdc,
2113 };
2114
2115 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2116 {
2117     GLuint *name;
2118     DWORD flag;
2119
2120     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2121
2122     if(srgb)
2123     {
2124         name = &surface->texture_name_srgb;
2125         flag = SFLAG_INSRGBTEX;
2126     }
2127     else
2128     {
2129         name = &surface->texture_name;
2130         flag = SFLAG_INTEXTURE;
2131     }
2132
2133     if (!*name && new_name)
2134     {
2135         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2136          * surface has no texture name yet. See if we can get rid of this. */
2137         if (surface->flags & flag)
2138             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2139         surface_modify_location(surface, flag, FALSE);
2140     }
2141
2142     *name = new_name;
2143     surface_force_reload(surface);
2144 }
2145
2146 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2147 {
2148     TRACE("surface %p, target %#x.\n", surface, target);
2149
2150     if (surface->texture_target != target)
2151     {
2152         if (target == GL_TEXTURE_RECTANGLE_ARB)
2153         {
2154             surface->flags &= ~SFLAG_NORMCOORD;
2155         }
2156         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2157         {
2158             surface->flags |= SFLAG_NORMCOORD;
2159         }
2160     }
2161     surface->texture_target = target;
2162     surface_force_reload(surface);
2163 }
2164
2165 /* Context activation is done by the caller. */
2166 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2167 {
2168     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2169
2170     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2171     {
2172         struct wined3d_texture *texture = surface->container.u.texture;
2173
2174         TRACE("Passing to container (%p).\n", texture);
2175         texture->texture_ops->texture_bind(texture, context, srgb);
2176     }
2177     else
2178     {
2179         if (surface->texture_level)
2180         {
2181             ERR("Standalone surface %p is non-zero texture level %u.\n",
2182                     surface, surface->texture_level);
2183         }
2184
2185         if (srgb)
2186             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2187
2188         ENTER_GL();
2189
2190         if (!surface->texture_name)
2191         {
2192             glGenTextures(1, &surface->texture_name);
2193             checkGLcall("glGenTextures");
2194
2195             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2196
2197             context_bind_texture(context, surface->texture_target, surface->texture_name);
2198             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2199             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2200             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2201             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2202             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2203             checkGLcall("glTexParameteri");
2204         }
2205         else
2206         {
2207             context_bind_texture(context, surface->texture_target, surface->texture_name);
2208         }
2209
2210         LEAVE_GL();
2211     }
2212 }
2213
2214 /* This call just downloads data, the caller is responsible for binding the
2215  * correct texture. */
2216 /* Context activation is done by the caller. */
2217 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2218 {
2219     const struct wined3d_format *format = surface->resource.format;
2220
2221     /* Only support read back of converted P8 surfaces. */
2222     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2223     {
2224         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2225         return;
2226     }
2227
2228     ENTER_GL();
2229
2230     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2231     {
2232         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2233                 surface, surface->texture_level, format->glFormat, format->glType,
2234                 surface->resource.allocatedMemory);
2235
2236         if (surface->flags & SFLAG_PBO)
2237         {
2238             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2239             checkGLcall("glBindBufferARB");
2240             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2241             checkGLcall("glGetCompressedTexImageARB");
2242             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2243             checkGLcall("glBindBufferARB");
2244         }
2245         else
2246         {
2247             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2248                     surface->texture_level, surface->resource.allocatedMemory));
2249             checkGLcall("glGetCompressedTexImageARB");
2250         }
2251
2252         LEAVE_GL();
2253     }
2254     else
2255     {
2256         void *mem;
2257         GLenum gl_format = format->glFormat;
2258         GLenum gl_type = format->glType;
2259         int src_pitch = 0;
2260         int dst_pitch = 0;
2261
2262         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2263         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2264         {
2265             gl_format = GL_ALPHA;
2266             gl_type = GL_UNSIGNED_BYTE;
2267         }
2268
2269         if (surface->flags & SFLAG_NONPOW2)
2270         {
2271             unsigned char alignment = surface->resource.device->surface_alignment;
2272             src_pitch = format->byte_count * surface->pow2Width;
2273             dst_pitch = wined3d_surface_get_pitch(surface);
2274             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2275             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2276         }
2277         else
2278         {
2279             mem = surface->resource.allocatedMemory;
2280         }
2281
2282         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2283                 surface, surface->texture_level, gl_format, gl_type, mem);
2284
2285         if (surface->flags & SFLAG_PBO)
2286         {
2287             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2288             checkGLcall("glBindBufferARB");
2289
2290             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2291             checkGLcall("glGetTexImage");
2292
2293             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2294             checkGLcall("glBindBufferARB");
2295         }
2296         else
2297         {
2298             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2299             checkGLcall("glGetTexImage");
2300         }
2301         LEAVE_GL();
2302
2303         if (surface->flags & SFLAG_NONPOW2)
2304         {
2305             const BYTE *src_data;
2306             BYTE *dst_data;
2307             UINT y;
2308             /*
2309              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2310              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2311              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2312              *
2313              * We're doing this...
2314              *
2315              * instead of boxing the texture :
2316              * |<-texture width ->|  -->pow2width|   /\
2317              * |111111111111111111|              |   |
2318              * |222 Texture 222222| boxed empty  | texture height
2319              * |3333 Data 33333333|              |   |
2320              * |444444444444444444|              |   \/
2321              * -----------------------------------   |
2322              * |     boxed  empty | boxed empty  | pow2height
2323              * |                  |              |   \/
2324              * -----------------------------------
2325              *
2326              *
2327              * we're repacking the data to the expected texture width
2328              *
2329              * |<-texture width ->|  -->pow2width|   /\
2330              * |111111111111111111222222222222222|   |
2331              * |222333333333333333333444444444444| texture height
2332              * |444444                           |   |
2333              * |                                 |   \/
2334              * |                                 |   |
2335              * |            empty                | pow2height
2336              * |                                 |   \/
2337              * -----------------------------------
2338              *
2339              * == is the same as
2340              *
2341              * |<-texture width ->|    /\
2342              * |111111111111111111|
2343              * |222222222222222222|texture height
2344              * |333333333333333333|
2345              * |444444444444444444|    \/
2346              * --------------------
2347              *
2348              * this also means that any references to allocatedMemory should work with the data as if were a
2349              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2350              *
2351              * internally the texture is still stored in a boxed format so any references to textureName will
2352              * get a boxed texture with width pow2width and not a texture of width resource.width.
2353              *
2354              * Performance should not be an issue, because applications normally do not lock the surfaces when
2355              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2356              * and doesn't have to be re-read. */
2357             src_data = mem;
2358             dst_data = surface->resource.allocatedMemory;
2359             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2360             for (y = 1; y < surface->resource.height; ++y)
2361             {
2362                 /* skip the first row */
2363                 src_data += src_pitch;
2364                 dst_data += dst_pitch;
2365                 memcpy(dst_data, src_data, dst_pitch);
2366             }
2367
2368             HeapFree(GetProcessHeap(), 0, mem);
2369         }
2370     }
2371
2372     /* Surface has now been downloaded */
2373     surface->flags |= SFLAG_INSYSMEM;
2374 }
2375
2376 /* This call just uploads data, the caller is responsible for binding the
2377  * correct texture. */
2378 /* Context activation is done by the caller. */
2379 void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2380         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2381         BOOL srgb, const struct wined3d_bo_address *data)
2382 {
2383     UINT update_w = src_rect->right - src_rect->left;
2384     UINT update_h = src_rect->bottom - src_rect->top;
2385
2386     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2387             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2388             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2389
2390     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2391         update_h *= format->heightscale;
2392
2393     ENTER_GL();
2394
2395     if (data->buffer_object)
2396     {
2397         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2398         checkGLcall("glBindBufferARB");
2399     }
2400
2401     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2402     {
2403         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2404         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2405         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2406         const BYTE *addr = data->addr;
2407         GLenum internal;
2408
2409         addr += (src_rect->top / format->block_height) * src_pitch;
2410         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2411
2412         if (srgb)
2413             internal = format->glGammaInternal;
2414         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2415             internal = format->rtInternal;
2416         else
2417             internal = format->glInternal;
2418
2419         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2420                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2421                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2422
2423         if (row_length == src_pitch)
2424         {
2425             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2426                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2427         }
2428         else
2429         {
2430             UINT row, y;
2431
2432             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2433              * can't use the unpack row length like below. */
2434             for (row = 0, y = dst_point->y; row < row_count; ++row)
2435             {
2436                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2437                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2438                 y += format->block_height;
2439                 addr += src_pitch;
2440             }
2441         }
2442         checkGLcall("glCompressedTexSubImage2DARB");
2443     }
2444     else
2445     {
2446         const BYTE *addr = data->addr;
2447
2448         addr += src_rect->top * src_w * format->byte_count;
2449         addr += src_rect->left * format->byte_count;
2450
2451         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2452                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2453                 update_w, update_h, format->glFormat, format->glType, addr);
2454
2455         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2456         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2457                 update_w, update_h, format->glFormat, format->glType, addr);
2458         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2459         checkGLcall("glTexSubImage2D");
2460     }
2461
2462     if (data->buffer_object)
2463     {
2464         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2465         checkGLcall("glBindBufferARB");
2466     }
2467
2468     LEAVE_GL();
2469
2470     if (wined3d_settings.strict_draw_ordering)
2471         wglFlush();
2472
2473     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2474     {
2475         struct wined3d_device *device = surface->resource.device;
2476         unsigned int i;
2477
2478         for (i = 0; i < device->context_count; ++i)
2479         {
2480             context_surface_update(device->contexts[i], surface);
2481         }
2482     }
2483 }
2484
2485 /* This call just allocates the texture, the caller is responsible for binding
2486  * the correct texture. */
2487 /* Context activation is done by the caller. */
2488 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2489         const struct wined3d_format *format, BOOL srgb)
2490 {
2491     BOOL enable_client_storage = FALSE;
2492     GLsizei width = surface->pow2Width;
2493     GLsizei height = surface->pow2Height;
2494     const BYTE *mem = NULL;
2495     GLenum internal;
2496
2497     if (srgb)
2498     {
2499         internal = format->glGammaInternal;
2500     }
2501     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2502     {
2503         internal = format->rtInternal;
2504     }
2505     else
2506     {
2507         internal = format->glInternal;
2508     }
2509
2510     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2511
2512     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2513             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2514             internal, width, height, format->glFormat, format->glType);
2515
2516     ENTER_GL();
2517
2518     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2519     {
2520         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2521                 || !surface->resource.allocatedMemory)
2522         {
2523             /* In some cases we want to disable client storage.
2524              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2525              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2526              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2527              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2528              */
2529             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2530             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2531             surface->flags &= ~SFLAG_CLIENT;
2532             enable_client_storage = TRUE;
2533         }
2534         else
2535         {
2536             surface->flags |= SFLAG_CLIENT;
2537
2538             /* Point OpenGL to our allocated texture memory. Do not use
2539              * resource.allocatedMemory here because it might point into a
2540              * PBO. Instead use heapMemory, but get the alignment right. */
2541             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2542                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2543         }
2544     }
2545
2546     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2547     {
2548         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2549                 internal, width, height, 0, surface->resource.size, mem));
2550         checkGLcall("glCompressedTexImage2DARB");
2551     }
2552     else
2553     {
2554         glTexImage2D(surface->texture_target, surface->texture_level,
2555                 internal, width, height, 0, format->glFormat, format->glType, mem);
2556         checkGLcall("glTexImage2D");
2557     }
2558
2559     if(enable_client_storage) {
2560         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2561         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2562     }
2563     LEAVE_GL();
2564 }
2565
2566 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2567  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2568 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2569 /* GL locking is done by the caller */
2570 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2571 {
2572     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2573     struct wined3d_renderbuffer_entry *entry;
2574     GLuint renderbuffer = 0;
2575     unsigned int src_width, src_height;
2576     unsigned int width, height;
2577
2578     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2579     {
2580         width = rt->pow2Width;
2581         height = rt->pow2Height;
2582     }
2583     else
2584     {
2585         width = surface->pow2Width;
2586         height = surface->pow2Height;
2587     }
2588
2589     src_width = surface->pow2Width;
2590     src_height = surface->pow2Height;
2591
2592     /* A depth stencil smaller than the render target is not valid */
2593     if (width > src_width || height > src_height) return;
2594
2595     /* Remove any renderbuffer set if the sizes match */
2596     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2597             || (width == src_width && height == src_height))
2598     {
2599         surface->current_renderbuffer = NULL;
2600         return;
2601     }
2602
2603     /* Look if we've already got a renderbuffer of the correct dimensions */
2604     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2605     {
2606         if (entry->width == width && entry->height == height)
2607         {
2608             renderbuffer = entry->id;
2609             surface->current_renderbuffer = entry;
2610             break;
2611         }
2612     }
2613
2614     if (!renderbuffer)
2615     {
2616         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2617         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2618         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2619                 surface->resource.format->glInternal, width, height);
2620
2621         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2622         entry->width = width;
2623         entry->height = height;
2624         entry->id = renderbuffer;
2625         list_add_head(&surface->renderbuffers, &entry->entry);
2626
2627         surface->current_renderbuffer = entry;
2628     }
2629
2630     checkGLcall("set_compatible_renderbuffer");
2631 }
2632
2633 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2634 {
2635     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2636
2637     TRACE("surface %p.\n", surface);
2638
2639     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2640     {
2641         ERR("Surface %p is not on a swapchain.\n", surface);
2642         return GL_NONE;
2643     }
2644
2645     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2646     {
2647         if (swapchain->render_to_fbo)
2648         {
2649             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2650             return GL_COLOR_ATTACHMENT0;
2651         }
2652         TRACE("Returning GL_BACK\n");
2653         return GL_BACK;
2654     }
2655     else if (surface == swapchain->front_buffer)
2656     {
2657         TRACE("Returning GL_FRONT\n");
2658         return GL_FRONT;
2659     }
2660
2661     FIXME("Higher back buffer, returning GL_BACK\n");
2662     return GL_BACK;
2663 }
2664
2665 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2666 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2667 {
2668     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2669
2670     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2671         /* No partial locking for textures yet. */
2672         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2673
2674     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2675     if (dirty_rect)
2676     {
2677         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2678         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2679         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2680         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2681     }
2682     else
2683     {
2684         surface->dirtyRect.left = 0;
2685         surface->dirtyRect.top = 0;
2686         surface->dirtyRect.right = surface->resource.width;
2687         surface->dirtyRect.bottom = surface->resource.height;
2688     }
2689
2690     /* if the container is a texture then mark it dirty. */
2691     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2692     {
2693         TRACE("Passing to container.\n");
2694         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2695     }
2696 }
2697
2698 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2699 {
2700     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2701     BOOL ck_changed;
2702
2703     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2704
2705     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2706     {
2707         ERR("Not supported on scratch surfaces.\n");
2708         return WINED3DERR_INVALIDCALL;
2709     }
2710
2711     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2712
2713     /* Reload if either the texture and sysmem have different ideas about the
2714      * color key, or the actual key values changed. */
2715     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2716             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2717             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2718     {
2719         TRACE("Reloading because of color keying\n");
2720         /* To perform the color key conversion we need a sysmem copy of
2721          * the surface. Make sure we have it. */
2722
2723         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2724         /* Make sure the texture is reloaded because of the color key change,
2725          * this kills performance though :( */
2726         /* TODO: This is not necessarily needed with hw palettized texture support. */
2727         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2728         /* Switching color keying on / off may change the internal format. */
2729         if (ck_changed)
2730             surface_force_reload(surface);
2731     }
2732     else if (!(surface->flags & flag))
2733     {
2734         TRACE("Reloading because surface is dirty.\n");
2735     }
2736     else
2737     {
2738         TRACE("surface is already in texture\n");
2739         return WINED3D_OK;
2740     }
2741
2742     /* No partial locking for textures yet. */
2743     surface_load_location(surface, flag, NULL);
2744     surface_evict_sysmem(surface);
2745
2746     return WINED3D_OK;
2747 }
2748
2749 /* See also float_16_to_32() in wined3d_private.h */
2750 static inline unsigned short float_32_to_16(const float *in)
2751 {
2752     int exp = 0;
2753     float tmp = fabsf(*in);
2754     unsigned int mantissa;
2755     unsigned short ret;
2756
2757     /* Deal with special numbers */
2758     if (*in == 0.0f)
2759         return 0x0000;
2760     if (isnan(*in))
2761         return 0x7c01;
2762     if (isinf(*in))
2763         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2764
2765     if (tmp < powf(2, 10))
2766     {
2767         do
2768         {
2769             tmp = tmp * 2.0f;
2770             exp--;
2771         } while (tmp < powf(2, 10));
2772     }
2773     else if (tmp >= powf(2, 11))
2774     {
2775         do
2776         {
2777             tmp /= 2.0f;
2778             exp++;
2779         } while (tmp >= powf(2, 11));
2780     }
2781
2782     mantissa = (unsigned int)tmp;
2783     if (tmp - mantissa >= 0.5f)
2784         ++mantissa; /* Round to nearest, away from zero. */
2785
2786     exp += 10;  /* Normalize the mantissa. */
2787     exp += 15;  /* Exponent is encoded with excess 15. */
2788
2789     if (exp > 30) /* too big */
2790     {
2791         ret = 0x7c00; /* INF */
2792     }
2793     else if (exp <= 0)
2794     {
2795         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2796         while (exp <= 0)
2797         {
2798             mantissa = mantissa >> 1;
2799             ++exp;
2800         }
2801         ret = mantissa & 0x3ff;
2802     }
2803     else
2804     {
2805         ret = (exp << 10) | (mantissa & 0x3ff);
2806     }
2807
2808     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2809     return ret;
2810 }
2811
2812 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2813 {
2814     ULONG refcount;
2815
2816     TRACE("Surface %p, container %p of type %#x.\n",
2817             surface, surface->container.u.base, surface->container.type);
2818
2819     switch (surface->container.type)
2820     {
2821         case WINED3D_CONTAINER_TEXTURE:
2822             return wined3d_texture_incref(surface->container.u.texture);
2823
2824         case WINED3D_CONTAINER_SWAPCHAIN:
2825             return wined3d_swapchain_incref(surface->container.u.swapchain);
2826
2827         default:
2828             ERR("Unhandled container type %#x.\n", surface->container.type);
2829         case WINED3D_CONTAINER_NONE:
2830             break;
2831     }
2832
2833     refcount = InterlockedIncrement(&surface->resource.ref);
2834     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2835
2836     return refcount;
2837 }
2838
2839 /* Do not call while under the GL lock. */
2840 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2841 {
2842     ULONG refcount;
2843
2844     TRACE("Surface %p, container %p of type %#x.\n",
2845             surface, surface->container.u.base, surface->container.type);
2846
2847     switch (surface->container.type)
2848     {
2849         case WINED3D_CONTAINER_TEXTURE:
2850             return wined3d_texture_decref(surface->container.u.texture);
2851
2852         case WINED3D_CONTAINER_SWAPCHAIN:
2853             return wined3d_swapchain_decref(surface->container.u.swapchain);
2854
2855         default:
2856             ERR("Unhandled container type %#x.\n", surface->container.type);
2857         case WINED3D_CONTAINER_NONE:
2858             break;
2859     }
2860
2861     refcount = InterlockedDecrement(&surface->resource.ref);
2862     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2863
2864     if (!refcount)
2865     {
2866         surface->surface_ops->surface_cleanup(surface);
2867         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2868
2869         TRACE("Destroyed surface %p.\n", surface);
2870         HeapFree(GetProcessHeap(), 0, surface);
2871     }
2872
2873     return refcount;
2874 }
2875
2876 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2877 {
2878     return resource_set_priority(&surface->resource, priority);
2879 }
2880
2881 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2882 {
2883     return resource_get_priority(&surface->resource);
2884 }
2885
2886 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2887 {
2888     TRACE("surface %p.\n", surface);
2889
2890     surface->surface_ops->surface_preload(surface);
2891 }
2892
2893 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2894 {
2895     TRACE("surface %p.\n", surface);
2896
2897     return surface->resource.parent;
2898 }
2899
2900 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2901 {
2902     TRACE("surface %p.\n", surface);
2903
2904     return &surface->resource;
2905 }
2906
2907 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2908 {
2909     TRACE("surface %p, flags %#x.\n", surface, flags);
2910
2911     switch (flags)
2912     {
2913         case WINEDDGBS_CANBLT:
2914         case WINEDDGBS_ISBLTDONE:
2915             return WINED3D_OK;
2916
2917         default:
2918             return WINED3DERR_INVALIDCALL;
2919     }
2920 }
2921
2922 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2923 {
2924     TRACE("surface %p, flags %#x.\n", surface, flags);
2925
2926     /* XXX: DDERR_INVALIDSURFACETYPE */
2927
2928     switch (flags)
2929     {
2930         case WINEDDGFS_CANFLIP:
2931         case WINEDDGFS_ISFLIPDONE:
2932             return WINED3D_OK;
2933
2934         default:
2935             return WINED3DERR_INVALIDCALL;
2936     }
2937 }
2938
2939 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2940 {
2941     TRACE("surface %p.\n", surface);
2942
2943     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2944     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2945 }
2946
2947 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2948 {
2949     TRACE("surface %p.\n", surface);
2950
2951     /* So far we don't lose anything :) */
2952     surface->flags &= ~SFLAG_LOST;
2953     return WINED3D_OK;
2954 }
2955
2956 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2957 {
2958     TRACE("surface %p, palette %p.\n", surface, palette);
2959
2960     if (surface->palette == palette)
2961     {
2962         TRACE("Nop palette change.\n");
2963         return WINED3D_OK;
2964     }
2965
2966     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2967         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2968
2969     surface->palette = palette;
2970
2971     if (palette)
2972     {
2973         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2974             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2975
2976         surface->surface_ops->surface_realize_palette(surface);
2977     }
2978
2979     return WINED3D_OK;
2980 }
2981
2982 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2983         DWORD flags, const WINEDDCOLORKEY *color_key)
2984 {
2985     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
2986
2987     if (flags & WINEDDCKEY_COLORSPACE)
2988     {
2989         FIXME(" colorkey value not supported (%08x) !\n", flags);
2990         return WINED3DERR_INVALIDCALL;
2991     }
2992
2993     /* Dirtify the surface, but only if a key was changed. */
2994     if (color_key)
2995     {
2996         switch (flags & ~WINEDDCKEY_COLORSPACE)
2997         {
2998             case WINEDDCKEY_DESTBLT:
2999                 surface->DestBltCKey = *color_key;
3000                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3001                 break;
3002
3003             case WINEDDCKEY_DESTOVERLAY:
3004                 surface->DestOverlayCKey = *color_key;
3005                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3006                 break;
3007
3008             case WINEDDCKEY_SRCOVERLAY:
3009                 surface->SrcOverlayCKey = *color_key;
3010                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3011                 break;
3012
3013             case WINEDDCKEY_SRCBLT:
3014                 surface->SrcBltCKey = *color_key;
3015                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3016                 break;
3017         }
3018     }
3019     else
3020     {
3021         switch (flags & ~WINEDDCKEY_COLORSPACE)
3022         {
3023             case WINEDDCKEY_DESTBLT:
3024                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3025                 break;
3026
3027             case WINEDDCKEY_DESTOVERLAY:
3028                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3029                 break;
3030
3031             case WINEDDCKEY_SRCOVERLAY:
3032                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3033                 break;
3034
3035             case WINEDDCKEY_SRCBLT:
3036                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3037                 break;
3038         }
3039     }
3040
3041     return WINED3D_OK;
3042 }
3043
3044 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3045 {
3046     TRACE("surface %p.\n", surface);
3047
3048     return surface->palette;
3049 }
3050
3051 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3052 {
3053     const struct wined3d_format *format = surface->resource.format;
3054     DWORD pitch;
3055
3056     TRACE("surface %p.\n", surface);
3057
3058     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3059     {
3060         /* Since compressed formats are block based, pitch means the amount of
3061          * bytes to the next row of block rather than the next row of pixels. */
3062         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3063         pitch = row_block_count * format->block_byte_count;
3064     }
3065     else
3066     {
3067         unsigned char alignment = surface->resource.device->surface_alignment;
3068         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3069         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3070     }
3071
3072     TRACE("Returning %u.\n", pitch);
3073
3074     return pitch;
3075 }
3076
3077 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3078 {
3079     TRACE("surface %p, mem %p.\n", surface, mem);
3080
3081     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3082     {
3083         WARN("Surface is locked or the DC is in use.\n");
3084         return WINED3DERR_INVALIDCALL;
3085     }
3086
3087     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3088     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3089     {
3090         ERR("Not supported on render targets.\n");
3091         return WINED3DERR_INVALIDCALL;
3092     }
3093
3094     if (mem && mem != surface->resource.allocatedMemory)
3095     {
3096         void *release = NULL;
3097
3098         /* Do I have to copy the old surface content? */
3099         if (surface->flags & SFLAG_DIBSECTION)
3100         {
3101             SelectObject(surface->hDC, surface->dib.holdbitmap);
3102             DeleteDC(surface->hDC);
3103             /* Release the DIB section. */
3104             DeleteObject(surface->dib.DIBsection);
3105             surface->dib.bitmap_data = NULL;
3106             surface->resource.allocatedMemory = NULL;
3107             surface->hDC = NULL;
3108             surface->flags &= ~SFLAG_DIBSECTION;
3109         }
3110         else if (!(surface->flags & SFLAG_USERPTR))
3111         {
3112             release = surface->resource.heapMemory;
3113             surface->resource.heapMemory = NULL;
3114         }
3115         surface->resource.allocatedMemory = mem;
3116         surface->flags |= SFLAG_USERPTR;
3117
3118         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3119         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3120
3121         /* For client textures OpenGL has to be notified. */
3122         if (surface->flags & SFLAG_CLIENT)
3123             surface_release_client_storage(surface);
3124
3125         /* Now free the old memory if any. */
3126         HeapFree(GetProcessHeap(), 0, release);
3127     }
3128     else if (surface->flags & SFLAG_USERPTR)
3129     {
3130         /* HeapMemory should be NULL already. */
3131         if (surface->resource.heapMemory)
3132             ERR("User pointer surface has heap memory allocated.\n");
3133
3134         if (!mem)
3135         {
3136             surface->resource.allocatedMemory = NULL;
3137             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3138
3139             if (surface->flags & SFLAG_CLIENT)
3140                 surface_release_client_storage(surface);
3141
3142             surface_prepare_system_memory(surface);
3143         }
3144
3145         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3146     }
3147
3148     return WINED3D_OK;
3149 }
3150
3151 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3152 {
3153     LONG w, h;
3154
3155     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3156
3157     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3158     {
3159         WARN("Not an overlay surface.\n");
3160         return WINEDDERR_NOTAOVERLAYSURFACE;
3161     }
3162
3163     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3164     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3165     surface->overlay_destrect.left = x;
3166     surface->overlay_destrect.top = y;
3167     surface->overlay_destrect.right = x + w;
3168     surface->overlay_destrect.bottom = y + h;
3169
3170     surface->surface_ops->surface_draw_overlay(surface);
3171
3172     return WINED3D_OK;
3173 }
3174
3175 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3176 {
3177     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3178
3179     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3180     {
3181         TRACE("Not an overlay surface.\n");
3182         return WINEDDERR_NOTAOVERLAYSURFACE;
3183     }
3184
3185     if (!surface->overlay_dest)
3186     {
3187         TRACE("Overlay not visible.\n");
3188         *x = 0;
3189         *y = 0;
3190         return WINEDDERR_OVERLAYNOTVISIBLE;
3191     }
3192
3193     *x = surface->overlay_destrect.left;
3194     *y = surface->overlay_destrect.top;
3195
3196     TRACE("Returning position %d, %d.\n", *x, *y);
3197
3198     return WINED3D_OK;
3199 }
3200
3201 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3202         DWORD flags, struct wined3d_surface *ref)
3203 {
3204     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3205
3206     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3207     {
3208         TRACE("Not an overlay surface.\n");
3209         return WINEDDERR_NOTAOVERLAYSURFACE;
3210     }
3211
3212     return WINED3D_OK;
3213 }
3214
3215 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3216         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3217 {
3218     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3219             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3220
3221     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3222     {
3223         WARN("Not an overlay surface.\n");
3224         return WINEDDERR_NOTAOVERLAYSURFACE;
3225     }
3226     else if (!dst_surface)
3227     {
3228         WARN("Dest surface is NULL.\n");
3229         return WINED3DERR_INVALIDCALL;
3230     }
3231
3232     if (src_rect)
3233     {
3234         surface->overlay_srcrect = *src_rect;
3235     }
3236     else
3237     {
3238         surface->overlay_srcrect.left = 0;
3239         surface->overlay_srcrect.top = 0;
3240         surface->overlay_srcrect.right = surface->resource.width;
3241         surface->overlay_srcrect.bottom = surface->resource.height;
3242     }
3243
3244     if (dst_rect)
3245     {
3246         surface->overlay_destrect = *dst_rect;
3247     }
3248     else
3249     {
3250         surface->overlay_destrect.left = 0;
3251         surface->overlay_destrect.top = 0;
3252         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3253         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3254     }
3255
3256     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3257     {
3258         list_remove(&surface->overlay_entry);
3259     }
3260
3261     if (flags & WINEDDOVER_SHOW)
3262     {
3263         if (surface->overlay_dest != dst_surface)
3264         {
3265             surface->overlay_dest = dst_surface;
3266             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3267         }
3268     }
3269     else if (flags & WINEDDOVER_HIDE)
3270     {
3271         /* tests show that the rectangles are erased on hide */
3272         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3273         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3274         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3275         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3276         surface->overlay_dest = NULL;
3277     }
3278
3279     surface->surface_ops->surface_draw_overlay(surface);
3280
3281     return WINED3D_OK;
3282 }
3283
3284 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3285 {
3286     TRACE("surface %p, clipper %p.\n", surface, clipper);
3287
3288     surface->clipper = clipper;
3289
3290     return WINED3D_OK;
3291 }
3292
3293 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3294 {
3295     TRACE("surface %p.\n", surface);
3296
3297     return surface->clipper;
3298 }
3299
3300 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3301 {
3302     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3303
3304     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3305
3306     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3307     {
3308         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3309         return WINED3DERR_INVALIDCALL;
3310     }
3311
3312     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3313             surface->pow2Width, surface->pow2Height);
3314     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3315     surface->resource.format = format;
3316
3317     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3318     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3319             format->glFormat, format->glInternal, format->glType);
3320
3321     return WINED3D_OK;
3322 }
3323
3324 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3325         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3326 {
3327     unsigned short *dst_s;
3328     const float *src_f;
3329     unsigned int x, y;
3330
3331     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3332
3333     for (y = 0; y < h; ++y)
3334     {
3335         src_f = (const float *)(src + y * pitch_in);
3336         dst_s = (unsigned short *) (dst + y * pitch_out);
3337         for (x = 0; x < w; ++x)
3338         {
3339             dst_s[x] = float_32_to_16(src_f + x);
3340         }
3341     }
3342 }
3343
3344 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3345         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3346 {
3347     static const unsigned char convert_5to8[] =
3348     {
3349         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3350         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3351         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3352         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3353     };
3354     static const unsigned char convert_6to8[] =
3355     {
3356         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3357         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3358         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3359         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3360         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3361         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3362         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3363         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3364     };
3365     unsigned int x, y;
3366
3367     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3368
3369     for (y = 0; y < h; ++y)
3370     {
3371         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3372         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3373         for (x = 0; x < w; ++x)
3374         {
3375             WORD pixel = src_line[x];
3376             dst_line[x] = 0xff000000
3377                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3378                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3379                     | convert_5to8[(pixel & 0x001f)];
3380         }
3381     }
3382 }
3383
3384 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3385  * in both cases we're just setting the X / Alpha channel to 0xff. */
3386 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3387         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3388 {
3389     unsigned int x, y;
3390
3391     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3392
3393     for (y = 0; y < h; ++y)
3394     {
3395         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3396         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3397
3398         for (x = 0; x < w; ++x)
3399         {
3400             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3401         }
3402     }
3403 }
3404
3405 static inline BYTE cliptobyte(int x)
3406 {
3407     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3408 }
3409
3410 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3411         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3412 {
3413     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3414     unsigned int x, y;
3415
3416     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3417
3418     for (y = 0; y < h; ++y)
3419     {
3420         const BYTE *src_line = src + y * pitch_in;
3421         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3422         for (x = 0; x < w; ++x)
3423         {
3424             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3425              *     C = Y - 16; D = U - 128; E = V - 128;
3426              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3427              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3428              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3429              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3430              * U and V are shared between the pixels. */
3431             if (!(x & 1)) /* For every even pixel, read new U and V. */
3432             {
3433                 d = (int) src_line[1] - 128;
3434                 e = (int) src_line[3] - 128;
3435                 r2 = 409 * e + 128;
3436                 g2 = - 100 * d - 208 * e + 128;
3437                 b2 = 516 * d + 128;
3438             }
3439             c2 = 298 * ((int) src_line[0] - 16);
3440             dst_line[x] = 0xff000000
3441                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3442                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3443                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3444                 /* Scale RGB values to 0..255 range,
3445                  * then clip them if still not in range (may be negative),
3446                  * then shift them within DWORD if necessary. */
3447             src_line += 2;
3448         }
3449     }
3450 }
3451
3452 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3453         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3454 {
3455     unsigned int x, y;
3456     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3457
3458     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3459
3460     for (y = 0; y < h; ++y)
3461     {
3462         const BYTE *src_line = src + y * pitch_in;
3463         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3464         for (x = 0; x < w; ++x)
3465         {
3466             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3467              *     C = Y - 16; D = U - 128; E = V - 128;
3468              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3469              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3470              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3471              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3472              * U and V are shared between the pixels. */
3473             if (!(x & 1)) /* For every even pixel, read new U and V. */
3474             {
3475                 d = (int) src_line[1] - 128;
3476                 e = (int) src_line[3] - 128;
3477                 r2 = 409 * e + 128;
3478                 g2 = - 100 * d - 208 * e + 128;
3479                 b2 = 516 * d + 128;
3480             }
3481             c2 = 298 * ((int) src_line[0] - 16);
3482             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3483                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3484                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3485                 /* Scale RGB values to 0..255 range,
3486                  * then clip them if still not in range (may be negative),
3487                  * then shift them within DWORD if necessary. */
3488             src_line += 2;
3489         }
3490     }
3491 }
3492
3493 struct d3dfmt_convertor_desc
3494 {
3495     enum wined3d_format_id from, to;
3496     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3497 };
3498
3499 static const struct d3dfmt_convertor_desc convertors[] =
3500 {
3501     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3502     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3503     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3504     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3505     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3506     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3507 };
3508
3509 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3510         enum wined3d_format_id to)
3511 {
3512     unsigned int i;
3513
3514     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3515     {
3516         if (convertors[i].from == from && convertors[i].to == to)
3517             return &convertors[i];
3518     }
3519
3520     return NULL;
3521 }
3522
3523 /*****************************************************************************
3524  * surface_convert_format
3525  *
3526  * Creates a duplicate of a surface in a different format. Is used by Blt to
3527  * blit between surfaces with different formats.
3528  *
3529  * Parameters
3530  *  source: Source surface
3531  *  fmt: Requested destination format
3532  *
3533  *****************************************************************************/
3534 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3535 {
3536     const struct d3dfmt_convertor_desc *conv;
3537     WINED3DLOCKED_RECT lock_src, lock_dst;
3538     struct wined3d_surface *ret = NULL;
3539     HRESULT hr;
3540
3541     conv = find_convertor(source->resource.format->id, to_fmt);
3542     if (!conv)
3543     {
3544         FIXME("Cannot find a conversion function from format %s to %s.\n",
3545                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3546         return NULL;
3547     }
3548
3549     wined3d_surface_create(source->resource.device, source->resource.width,
3550             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3551             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3552             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3553     if (!ret)
3554     {
3555         ERR("Failed to create a destination surface for conversion.\n");
3556         return NULL;
3557     }
3558
3559     memset(&lock_src, 0, sizeof(lock_src));
3560     memset(&lock_dst, 0, sizeof(lock_dst));
3561
3562     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3563     if (FAILED(hr))
3564     {
3565         ERR("Failed to lock the source surface.\n");
3566         wined3d_surface_decref(ret);
3567         return NULL;
3568     }
3569     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3570     if (FAILED(hr))
3571     {
3572         ERR("Failed to lock the destination surface.\n");
3573         wined3d_surface_unmap(source);
3574         wined3d_surface_decref(ret);
3575         return NULL;
3576     }
3577
3578     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3579             source->resource.width, source->resource.height);
3580
3581     wined3d_surface_unmap(ret);
3582     wined3d_surface_unmap(source);
3583
3584     return ret;
3585 }
3586
3587 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3588         unsigned int bpp, UINT pitch, DWORD color)
3589 {
3590     BYTE *first;
3591     int x, y;
3592
3593     /* Do first row */
3594
3595 #define COLORFILL_ROW(type) \
3596 do { \
3597     type *d = (type *)buf; \
3598     for (x = 0; x < width; ++x) \
3599         d[x] = (type)color; \
3600 } while(0)
3601
3602     switch (bpp)
3603     {
3604         case 1:
3605             COLORFILL_ROW(BYTE);
3606             break;
3607
3608         case 2:
3609             COLORFILL_ROW(WORD);
3610             break;
3611
3612         case 3:
3613         {
3614             BYTE *d = buf;
3615             for (x = 0; x < width; ++x, d += 3)
3616             {
3617                 d[0] = (color      ) & 0xFF;
3618                 d[1] = (color >>  8) & 0xFF;
3619                 d[2] = (color >> 16) & 0xFF;
3620             }
3621             break;
3622         }
3623         case 4:
3624             COLORFILL_ROW(DWORD);
3625             break;
3626
3627         default:
3628             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3629             return WINED3DERR_NOTAVAILABLE;
3630     }
3631
3632 #undef COLORFILL_ROW
3633
3634     /* Now copy first row. */
3635     first = buf;
3636     for (y = 1; y < height; ++y)
3637     {
3638         buf += pitch;
3639         memcpy(buf, first, width * bpp);
3640     }
3641
3642     return WINED3D_OK;
3643 }
3644
3645 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3646 {
3647     TRACE("surface %p.\n", surface);
3648
3649     if (!(surface->flags & SFLAG_LOCKED))
3650     {
3651         WARN("Trying to unmap unmapped surface.\n");
3652         return WINEDDERR_NOTLOCKED;
3653     }
3654     surface->flags &= ~SFLAG_LOCKED;
3655
3656     surface->surface_ops->surface_unmap(surface);
3657
3658     return WINED3D_OK;
3659 }
3660
3661 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3662         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3663 {
3664     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3665             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3666
3667     if (surface->flags & SFLAG_LOCKED)
3668     {
3669         WARN("Surface is already mapped.\n");
3670         return WINED3DERR_INVALIDCALL;
3671     }
3672     surface->flags |= SFLAG_LOCKED;
3673
3674     if (!(surface->flags & SFLAG_LOCKABLE))
3675         WARN("Trying to lock unlockable surface.\n");
3676
3677     surface->surface_ops->surface_map(surface, rect, flags);
3678
3679     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3680
3681     if (!rect)
3682     {
3683         locked_rect->pBits = surface->resource.allocatedMemory;
3684         surface->lockedRect.left = 0;
3685         surface->lockedRect.top = 0;
3686         surface->lockedRect.right = surface->resource.width;
3687         surface->lockedRect.bottom = surface->resource.height;
3688     }
3689     else
3690     {
3691         const struct wined3d_format *format = surface->resource.format;
3692
3693         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3694         {
3695             /* Compressed textures are block based, so calculate the offset of
3696              * the block that contains the top-left pixel of the locked rectangle. */
3697             locked_rect->pBits = surface->resource.allocatedMemory
3698                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3699                     + ((rect->left / format->block_width) * format->block_byte_count);
3700         }
3701         else
3702         {
3703             locked_rect->pBits = surface->resource.allocatedMemory
3704                     + (locked_rect->Pitch * rect->top)
3705                     + (rect->left * format->byte_count);
3706         }
3707         surface->lockedRect.left = rect->left;
3708         surface->lockedRect.top = rect->top;
3709         surface->lockedRect.right = rect->right;
3710         surface->lockedRect.bottom = rect->bottom;
3711     }
3712
3713     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3714     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3715
3716     return WINED3D_OK;
3717 }
3718
3719 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3720 {
3721     HRESULT hr;
3722
3723     TRACE("surface %p, dc %p.\n", surface, dc);
3724
3725     if (surface->flags & SFLAG_USERPTR)
3726     {
3727         ERR("Not supported on surfaces with application-provided memory.\n");
3728         return WINEDDERR_NODC;
3729     }
3730
3731     /* Give more detailed info for ddraw. */
3732     if (surface->flags & SFLAG_DCINUSE)
3733         return WINEDDERR_DCALREADYCREATED;
3734
3735     /* Can't GetDC if the surface is locked. */
3736     if (surface->flags & SFLAG_LOCKED)
3737         return WINED3DERR_INVALIDCALL;
3738
3739     hr = surface->surface_ops->surface_getdc(surface);
3740     if (FAILED(hr))
3741         return hr;
3742
3743     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3744             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3745     {
3746         /* GetDC on palettized formats is unsupported in D3D9, and the method
3747          * is missing in D3D8, so this should only be used for DX <=7
3748          * surfaces (with non-device palettes). */
3749         const PALETTEENTRY *pal = NULL;
3750
3751         if (surface->palette)
3752         {
3753             pal = surface->palette->palents;
3754         }
3755         else
3756         {
3757             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3758             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3759
3760             if (dds_primary && dds_primary->palette)
3761                 pal = dds_primary->palette->palents;
3762         }
3763
3764         if (pal)
3765         {
3766             RGBQUAD col[256];
3767             unsigned int i;
3768
3769             for (i = 0; i < 256; ++i)
3770             {
3771                 col[i].rgbRed = pal[i].peRed;
3772                 col[i].rgbGreen = pal[i].peGreen;
3773                 col[i].rgbBlue = pal[i].peBlue;
3774                 col[i].rgbReserved = 0;
3775             }
3776             SetDIBColorTable(surface->hDC, 0, 256, col);
3777         }
3778     }
3779
3780     surface->flags |= SFLAG_DCINUSE;
3781
3782     *dc = surface->hDC;
3783     TRACE("Returning dc %p.\n", *dc);
3784
3785     return WINED3D_OK;
3786 }
3787
3788 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3789 {
3790     TRACE("surface %p, dc %p.\n", surface, dc);
3791
3792     if (!(surface->flags & SFLAG_DCINUSE))
3793         return WINEDDERR_NODC;
3794
3795     if (surface->hDC != dc)
3796     {
3797         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3798                 dc, surface->hDC);
3799         return WINEDDERR_NODC;
3800     }
3801
3802     /* Copy the contents of the DIB over to the PBO. */
3803     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3804         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3805
3806     /* We locked first, so unlock now. */
3807     wined3d_surface_unmap(surface);
3808
3809     surface->flags &= ~SFLAG_DCINUSE;
3810
3811     return WINED3D_OK;
3812 }
3813
3814 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3815 {
3816     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3817
3818     if (flags)
3819     {
3820         static UINT once;
3821         if (!once++)
3822             FIXME("Ignoring flags %#x.\n", flags);
3823         else
3824             WARN("Ignoring flags %#x.\n", flags);
3825     }
3826
3827     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3828     {
3829         ERR("Not supported on swapchain surfaces.\n");
3830         return WINEDDERR_NOTFLIPPABLE;
3831     }
3832
3833     /* Flipping is only supported on render targets and overlays. */
3834     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3835     {
3836         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3837         return WINEDDERR_NOTFLIPPABLE;
3838     }
3839
3840     flip_surface(surface, override);
3841
3842     /* Update overlays if they're visible. */
3843     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3844         return surface->surface_ops->surface_draw_overlay(surface);
3845
3846     return WINED3D_OK;
3847 }
3848
3849 /* Do not call while under the GL lock. */
3850 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3851 {
3852     struct wined3d_device *device = surface->resource.device;
3853
3854     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3855
3856     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3857     {
3858         struct wined3d_texture *texture = surface->container.u.texture;
3859
3860         TRACE("Passing to container (%p).\n", texture);
3861         texture->texture_ops->texture_preload(texture, srgb);
3862     }
3863     else
3864     {
3865         struct wined3d_context *context;
3866
3867         TRACE("(%p) : About to load surface\n", surface);
3868
3869         /* TODO: Use already acquired context when possible. */
3870         context = context_acquire(device, NULL);
3871
3872         if (surface->resource.format->id == WINED3DFMT_P8_UINT
3873                 || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3874         {
3875             if (palette9_changed(surface))
3876             {
3877                 TRACE("Reloading surface because the d3d8/9 palette was changed\n");
3878                 /* TODO: This is not necessarily needed with hw palettized texture support */
3879                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3880                 /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
3881                 surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
3882             }
3883         }
3884
3885         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3886
3887         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3888         {
3889             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3890             GLclampf tmp;
3891             tmp = 0.9f;
3892             ENTER_GL();
3893             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3894             LEAVE_GL();
3895         }
3896
3897         context_release(context);
3898     }
3899 }
3900
3901 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3902 {
3903     if (!surface->resource.allocatedMemory)
3904     {
3905         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3906                 surface->resource.size + RESOURCE_ALIGNMENT);
3907         if (!surface->resource.heapMemory)
3908         {
3909             ERR("Out of memory\n");
3910             return FALSE;
3911         }
3912         surface->resource.allocatedMemory =
3913             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3914     }
3915     else
3916     {
3917         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3918     }
3919
3920     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3921
3922     return TRUE;
3923 }
3924
3925 /* Read the framebuffer back into the surface */
3926 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3927 {
3928     struct wined3d_device *device = surface->resource.device;
3929     const struct wined3d_gl_info *gl_info;
3930     struct wined3d_context *context;
3931     BYTE *mem;
3932     GLint fmt;
3933     GLint type;
3934     BYTE *row, *top, *bottom;
3935     int i;
3936     BOOL bpp;
3937     RECT local_rect;
3938     BOOL srcIsUpsideDown;
3939     GLint rowLen = 0;
3940     GLint skipPix = 0;
3941     GLint skipRow = 0;
3942
3943     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3944         static BOOL warned = FALSE;
3945         if(!warned) {
3946             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3947             warned = TRUE;
3948         }
3949         return;
3950     }
3951
3952     context = context_acquire(device, surface);
3953     context_apply_blit_state(context, device);
3954     gl_info = context->gl_info;
3955
3956     ENTER_GL();
3957
3958     /* Select the correct read buffer, and give some debug output.
3959      * There is no need to keep track of the current read buffer or reset it, every part of the code
3960      * that reads sets the read buffer as desired.
3961      */
3962     if (surface_is_offscreen(surface))
3963     {
3964         /* Mapping the primary render target which is not on a swapchain.
3965          * Read from the back buffer. */
3966         TRACE("Mapping offscreen render target.\n");
3967         glReadBuffer(device->offscreenBuffer);
3968         srcIsUpsideDown = TRUE;
3969     }
3970     else
3971     {
3972         /* Onscreen surfaces are always part of a swapchain */
3973         GLenum buffer = surface_get_gl_buffer(surface);
3974         TRACE("Mapping %#x buffer.\n", buffer);
3975         glReadBuffer(buffer);
3976         checkGLcall("glReadBuffer");
3977         srcIsUpsideDown = FALSE;
3978     }
3979
3980     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3981     if (!rect)
3982     {
3983         local_rect.left = 0;
3984         local_rect.top = 0;
3985         local_rect.right = surface->resource.width;
3986         local_rect.bottom = surface->resource.height;
3987     }
3988     else
3989     {
3990         local_rect = *rect;
3991     }
3992     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3993
3994     switch (surface->resource.format->id)
3995     {
3996         case WINED3DFMT_P8_UINT:
3997         {
3998             if (primary_render_target_is_p8(device))
3999             {
4000                 /* In case of P8 render targets the index is stored in the alpha component */
4001                 fmt = GL_ALPHA;
4002                 type = GL_UNSIGNED_BYTE;
4003                 mem = dest;
4004                 bpp = surface->resource.format->byte_count;
4005             }
4006             else
4007             {
4008                 /* GL can't return palettized data, so read ARGB pixels into a
4009                  * separate block of memory and convert them into palettized format
4010                  * in software. Slow, but if the app means to use palettized render
4011                  * targets and locks it...
4012                  *
4013                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4014                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4015                  * for the color channels when palettizing the colors.
4016                  */
4017                 fmt = GL_RGB;
4018                 type = GL_UNSIGNED_BYTE;
4019                 pitch *= 3;
4020                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4021                 if (!mem)
4022                 {
4023                     ERR("Out of memory\n");
4024                     LEAVE_GL();
4025                     return;
4026                 }
4027                 bpp = surface->resource.format->byte_count * 3;
4028             }
4029         }
4030         break;
4031
4032         default:
4033             mem = dest;
4034             fmt = surface->resource.format->glFormat;
4035             type = surface->resource.format->glType;
4036             bpp = surface->resource.format->byte_count;
4037     }
4038
4039     if (surface->flags & SFLAG_PBO)
4040     {
4041         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4042         checkGLcall("glBindBufferARB");
4043         if (mem)
4044         {
4045             ERR("mem not null for pbo -- unexpected\n");
4046             mem = NULL;
4047         }
4048     }
4049
4050     /* Save old pixel store pack state */
4051     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4052     checkGLcall("glGetIntegerv");
4053     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4054     checkGLcall("glGetIntegerv");
4055     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4056     checkGLcall("glGetIntegerv");
4057
4058     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4059     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4060     checkGLcall("glPixelStorei");
4061     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4062     checkGLcall("glPixelStorei");
4063     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4064     checkGLcall("glPixelStorei");
4065
4066     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4067             local_rect.right - local_rect.left,
4068             local_rect.bottom - local_rect.top,
4069             fmt, type, mem);
4070     checkGLcall("glReadPixels");
4071
4072     /* Reset previous pixel store pack state */
4073     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4074     checkGLcall("glPixelStorei");
4075     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4076     checkGLcall("glPixelStorei");
4077     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4078     checkGLcall("glPixelStorei");
4079
4080     if (surface->flags & SFLAG_PBO)
4081     {
4082         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4083         checkGLcall("glBindBufferARB");
4084
4085         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4086          * to get a pointer to it and perform the flipping in software. This is a lot
4087          * faster than calling glReadPixels for each line. In case we want more speed
4088          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4089         if (!srcIsUpsideDown)
4090         {
4091             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4092             checkGLcall("glBindBufferARB");
4093
4094             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4095             checkGLcall("glMapBufferARB");
4096         }
4097     }
4098
4099     /* TODO: Merge this with the palettization loop below for P8 targets */
4100     if(!srcIsUpsideDown) {
4101         UINT len, off;
4102         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4103             Flip the lines in software */
4104         len = (local_rect.right - local_rect.left) * bpp;
4105         off = local_rect.left * bpp;
4106
4107         row = HeapAlloc(GetProcessHeap(), 0, len);
4108         if(!row) {
4109             ERR("Out of memory\n");
4110             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4111                 HeapFree(GetProcessHeap(), 0, mem);
4112             LEAVE_GL();
4113             return;
4114         }
4115
4116         top = mem + pitch * local_rect.top;
4117         bottom = mem + pitch * (local_rect.bottom - 1);
4118         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4119             memcpy(row, top + off, len);
4120             memcpy(top + off, bottom + off, len);
4121             memcpy(bottom + off, row, len);
4122             top += pitch;
4123             bottom -= pitch;
4124         }
4125         HeapFree(GetProcessHeap(), 0, row);
4126
4127         /* Unmap the temp PBO buffer */
4128         if (surface->flags & SFLAG_PBO)
4129         {
4130             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4131             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4132         }
4133     }
4134
4135     LEAVE_GL();
4136     context_release(context);
4137
4138     /* For P8 textures we need to perform an inverse palette lookup. This is
4139      * done by searching for a palette index which matches the RGB value.
4140      * Note this isn't guaranteed to work when there are multiple entries for
4141      * the same color but we have no choice. In case of P8 render targets,
4142      * the index is stored in the alpha component so no conversion is needed. */
4143     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4144     {
4145         const PALETTEENTRY *pal = NULL;
4146         DWORD width = pitch / 3;
4147         int x, y, c;
4148
4149         if (surface->palette)
4150         {
4151             pal = surface->palette->palents;
4152         }
4153         else
4154         {
4155             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4156             HeapFree(GetProcessHeap(), 0, mem);
4157             return;
4158         }
4159
4160         for(y = local_rect.top; y < local_rect.bottom; y++) {
4161             for(x = local_rect.left; x < local_rect.right; x++) {
4162                 /*                      start              lines            pixels      */
4163                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4164                 const BYTE *green = blue  + 1;
4165                 const BYTE *red = green + 1;
4166
4167                 for(c = 0; c < 256; c++) {
4168                     if(*red   == pal[c].peRed   &&
4169                        *green == pal[c].peGreen &&
4170                        *blue  == pal[c].peBlue)
4171                     {
4172                         *((BYTE *) dest + y * width + x) = c;
4173                         break;
4174                     }
4175                 }
4176             }
4177         }
4178         HeapFree(GetProcessHeap(), 0, mem);
4179     }
4180 }
4181
4182 /* Read the framebuffer contents into a texture */
4183 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
4184 {
4185     struct wined3d_device *device = surface->resource.device;
4186     struct wined3d_context *context;
4187
4188     if (!surface_is_offscreen(surface))
4189     {
4190         /* We would need to flip onscreen surfaces, but there's no efficient
4191          * way to do that here. It makes more sense for the caller to
4192          * explicitly go through sysmem. */
4193         ERR("Not supported for onscreen targets.\n");
4194         return;
4195     }
4196
4197     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
4198      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
4199      * states in the stateblock, and no driver was found yet that had bugs in that regard.
4200      */
4201     context = context_acquire(device, surface);
4202     device_invalidate_state(device, STATE_FRAMEBUFFER);
4203
4204     surface_prepare_texture(surface, context, srgb);
4205     surface_bind_and_dirtify(surface, context, srgb);
4206
4207     TRACE("Reading back offscreen render target %p.\n", surface);
4208
4209     ENTER_GL();
4210
4211     glReadBuffer(device->offscreenBuffer);
4212     checkGLcall("glReadBuffer");
4213
4214     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4215             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4216     checkGLcall("glCopyTexSubImage2D");
4217
4218     LEAVE_GL();
4219
4220     context_release(context);
4221 }
4222
4223 /* Context activation is done by the caller. */
4224 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4225         struct wined3d_context *context, BOOL srgb)
4226 {
4227     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4228     CONVERT_TYPES convert;
4229     struct wined3d_format format;
4230
4231     if (surface->flags & alloc_flag) return;
4232
4233     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4234     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4235     else surface->flags &= ~SFLAG_CONVERTED;
4236
4237     surface_bind_and_dirtify(surface, context, srgb);
4238     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4239     surface->flags |= alloc_flag;
4240 }
4241
4242 /* Context activation is done by the caller. */
4243 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4244 {
4245     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4246     {
4247         struct wined3d_texture *texture = surface->container.u.texture;
4248         UINT sub_count = texture->level_count * texture->layer_count;
4249         UINT i;
4250
4251         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4252
4253         for (i = 0; i < sub_count; ++i)
4254         {
4255             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4256             surface_prepare_texture_internal(s, context, srgb);
4257         }
4258
4259         return;
4260     }
4261
4262     surface_prepare_texture_internal(surface, context, srgb);
4263 }
4264
4265 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4266 {
4267     if (multisample)
4268     {
4269         if (surface->rb_multisample)
4270             return;
4271
4272         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4273         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4274         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4275                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4276         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4277     }
4278     else
4279     {
4280         if (surface->rb_resolved)
4281             return;
4282
4283         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4284         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4285         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4286                 surface->pow2Width, surface->pow2Height);
4287         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4288     }
4289 }
4290
4291 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4292         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4293 {
4294     struct wined3d_device *device = surface->resource.device;
4295     UINT pitch = wined3d_surface_get_pitch(surface);
4296     const struct wined3d_gl_info *gl_info;
4297     struct wined3d_context *context;
4298     RECT local_rect;
4299     UINT w, h;
4300
4301     surface_get_rect(surface, rect, &local_rect);
4302
4303     mem += local_rect.top * pitch + local_rect.left * bpp;
4304     w = local_rect.right - local_rect.left;
4305     h = local_rect.bottom - local_rect.top;
4306
4307     /* Activate the correct context for the render target */
4308     context = context_acquire(device, surface);
4309     context_apply_blit_state(context, device);
4310     gl_info = context->gl_info;
4311
4312     ENTER_GL();
4313
4314     if (!surface_is_offscreen(surface))
4315     {
4316         GLenum buffer = surface_get_gl_buffer(surface);
4317         TRACE("Unlocking %#x buffer.\n", buffer);
4318         context_set_draw_buffer(context, buffer);
4319
4320         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4321         glPixelZoom(1.0f, -1.0f);
4322     }
4323     else
4324     {
4325         /* Primary offscreen render target */
4326         TRACE("Offscreen render target.\n");
4327         context_set_draw_buffer(context, device->offscreenBuffer);
4328
4329         glPixelZoom(1.0f, 1.0f);
4330     }
4331
4332     glRasterPos3i(local_rect.left, local_rect.top, 1);
4333     checkGLcall("glRasterPos3i");
4334
4335     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4336     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4337
4338     if (surface->flags & SFLAG_PBO)
4339     {
4340         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4341         checkGLcall("glBindBufferARB");
4342     }
4343
4344     glDrawPixels(w, h, fmt, type, mem);
4345     checkGLcall("glDrawPixels");
4346
4347     if (surface->flags & SFLAG_PBO)
4348     {
4349         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4350         checkGLcall("glBindBufferARB");
4351     }
4352
4353     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4354     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4355
4356     LEAVE_GL();
4357
4358     if (wined3d_settings.strict_draw_ordering
4359             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4360             && surface->container.u.swapchain->front_buffer == surface))
4361         wglFlush();
4362
4363     context_release(context);
4364 }
4365
4366 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4367         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4368 {
4369     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4370     const struct wined3d_device *device = surface->resource.device;
4371     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4372     BOOL blit_supported = FALSE;
4373
4374     /* Copy the default values from the surface. Below we might perform fixups */
4375     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4376     *format = *surface->resource.format;
4377     *convert = NO_CONVERSION;
4378
4379     /* Ok, now look if we have to do any conversion */
4380     switch (surface->resource.format->id)
4381     {
4382         case WINED3DFMT_P8_UINT:
4383             /* Below the call to blit_supported is disabled for Wine 1.2
4384              * because the function isn't operating correctly yet. At the
4385              * moment 8-bit blits are handled in software and if certain GL
4386              * extensions are around, surface conversion is performed at
4387              * upload time. The blit_supported call recognizes it as a
4388              * destination fixup. This type of upload 'fixup' and 8-bit to
4389              * 8-bit blits need to be handled by the blit_shader.
4390              * TODO: get rid of this #if 0. */
4391 #if 0
4392             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4393                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4394                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4395 #endif
4396             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4397
4398             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4399              * texturing. Further also use conversion in case of color keying.
4400              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4401              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4402              * conflicts with this.
4403              */
4404             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4405                     || colorkey_active || !use_texturing)
4406             {
4407                 format->glFormat = GL_RGBA;
4408                 format->glInternal = GL_RGBA;
4409                 format->glType = GL_UNSIGNED_BYTE;
4410                 format->conv_byte_count = 4;
4411                 if (colorkey_active)
4412                     *convert = CONVERT_PALETTED_CK;
4413                 else
4414                     *convert = CONVERT_PALETTED;
4415             }
4416             break;
4417
4418         case WINED3DFMT_B2G3R3_UNORM:
4419             /* **********************
4420                 GL_UNSIGNED_BYTE_3_3_2
4421                 ********************** */
4422             if (colorkey_active) {
4423                 /* This texture format will never be used.. So do not care about color keying
4424                     up until the point in time it will be needed :-) */
4425                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4426             }
4427             break;
4428
4429         case WINED3DFMT_B5G6R5_UNORM:
4430             if (colorkey_active)
4431             {
4432                 *convert = CONVERT_CK_565;
4433                 format->glFormat = GL_RGBA;
4434                 format->glInternal = GL_RGB5_A1;
4435                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4436                 format->conv_byte_count = 2;
4437             }
4438             break;
4439
4440         case WINED3DFMT_B5G5R5X1_UNORM:
4441             if (colorkey_active)
4442             {
4443                 *convert = CONVERT_CK_5551;
4444                 format->glFormat = GL_BGRA;
4445                 format->glInternal = GL_RGB5_A1;
4446                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4447                 format->conv_byte_count = 2;
4448             }
4449             break;
4450
4451         case WINED3DFMT_B8G8R8_UNORM:
4452             if (colorkey_active)
4453             {
4454                 *convert = CONVERT_CK_RGB24;
4455                 format->glFormat = GL_RGBA;
4456                 format->glInternal = GL_RGBA8;
4457                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4458                 format->conv_byte_count = 4;
4459             }
4460             break;
4461
4462         case WINED3DFMT_B8G8R8X8_UNORM:
4463             if (colorkey_active)
4464             {
4465                 *convert = CONVERT_RGB32_888;
4466                 format->glFormat = GL_RGBA;
4467                 format->glInternal = GL_RGBA8;
4468                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4469                 format->conv_byte_count = 4;
4470             }
4471             break;
4472
4473         default:
4474             break;
4475     }
4476
4477     return WINED3D_OK;
4478 }
4479
4480 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4481 {
4482     const struct wined3d_device *device = surface->resource.device;
4483     const struct wined3d_palette *pal = surface->palette;
4484     BOOL index_in_alpha = FALSE;
4485     unsigned int i;
4486
4487     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4488      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4489      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4490      * duplicate entries. Store the color key in the unused alpha component to speed the
4491      * download up and to make conversion unneeded. */
4492     index_in_alpha = primary_render_target_is_p8(device);
4493
4494     if (!pal)
4495     {
4496         /* In DirectDraw the palette is a property of the surface, there are no such things as device palettes. */
4497         if (device->wined3d->flags & WINED3D_PALETTE_PER_SURFACE)
4498         {
4499             ERR("This code should never get entered for DirectDraw!, expect problems\n");
4500             if (index_in_alpha)
4501             {
4502                 /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4503                  * there's no palette at this time. */
4504                 for (i = 0; i < 256; i++) table[i][3] = i;
4505             }
4506         }
4507         else
4508         {
4509             /* Direct3D >= 8 palette usage style: P8 textures use device palettes, palette entry format is A8R8G8B8,
4510              * alpha is stored in peFlags and may be used by the app if D3DPTEXTURECAPS_ALPHAPALETTE device
4511              * capability flag is present (wine does advertise this capability) */
4512             for (i = 0; i < 256; ++i)
4513             {
4514                 table[i][0] = device->palettes[device->currentPalette][i].peRed;
4515                 table[i][1] = device->palettes[device->currentPalette][i].peGreen;
4516                 table[i][2] = device->palettes[device->currentPalette][i].peBlue;
4517                 table[i][3] = device->palettes[device->currentPalette][i].peFlags;
4518             }
4519         }
4520     }
4521     else
4522     {
4523         TRACE("Using surface palette %p\n", pal);
4524         /* Get the surface's palette */
4525         for (i = 0; i < 256; ++i)
4526         {
4527             table[i][0] = pal->palents[i].peRed;
4528             table[i][1] = pal->palents[i].peGreen;
4529             table[i][2] = pal->palents[i].peBlue;
4530
4531             /* When index_in_alpha is set the palette index is stored in the
4532              * alpha component. In case of a readback we can then read
4533              * GL_ALPHA. Color keying is handled in BltOverride using a
4534              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4535              * color key itself is passed to glAlphaFunc in other cases the
4536              * alpha component of pixels that should be masked away is set to 0. */
4537             if (index_in_alpha)
4538             {
4539                 table[i][3] = i;
4540             }
4541             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4542                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4543             {
4544                 table[i][3] = 0x00;
4545             }
4546             else if (pal->flags & WINEDDPCAPS_ALPHA)
4547             {
4548                 table[i][3] = pal->palents[i].peFlags;
4549             }
4550             else
4551             {
4552                 table[i][3] = 0xFF;
4553             }
4554         }
4555     }
4556 }
4557
4558 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4559         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4560 {
4561     const BYTE *source;
4562     BYTE *dest;
4563     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4564
4565     switch (convert) {
4566         case NO_CONVERSION:
4567         {
4568             memcpy(dst, src, pitch * height);
4569             break;
4570         }
4571         case CONVERT_PALETTED:
4572         case CONVERT_PALETTED_CK:
4573         {
4574             BYTE table[256][4];
4575             unsigned int x, y;
4576
4577             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4578
4579             for (y = 0; y < height; y++)
4580             {
4581                 source = src + pitch * y;
4582                 dest = dst + outpitch * y;
4583                 /* This is an 1 bpp format, using the width here is fine */
4584                 for (x = 0; x < width; x++) {
4585                     BYTE color = *source++;
4586                     *dest++ = table[color][0];
4587                     *dest++ = table[color][1];
4588                     *dest++ = table[color][2];
4589                     *dest++ = table[color][3];
4590                 }
4591             }
4592         }
4593         break;
4594
4595         case CONVERT_CK_565:
4596         {
4597             /* Converting the 565 format in 5551 packed to emulate color-keying.
4598
4599               Note : in all these conversion, it would be best to average the averaging
4600                       pixels to get the color of the pixel that will be color-keyed to
4601                       prevent 'color bleeding'. This will be done later on if ever it is
4602                       too visible.
4603
4604               Note2: Nvidia documents say that their driver does not support alpha + color keying
4605                      on the same surface and disables color keying in such a case
4606             */
4607             unsigned int x, y;
4608             const WORD *Source;
4609             WORD *Dest;
4610
4611             TRACE("Color keyed 565\n");
4612
4613             for (y = 0; y < height; y++) {
4614                 Source = (const WORD *)(src + y * pitch);
4615                 Dest = (WORD *) (dst + y * outpitch);
4616                 for (x = 0; x < width; x++ ) {
4617                     WORD color = *Source++;
4618                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4619                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4620                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4621                         *Dest |= 0x0001;
4622                     Dest++;
4623                 }
4624             }
4625         }
4626         break;
4627
4628         case CONVERT_CK_5551:
4629         {
4630             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4631             unsigned int x, y;
4632             const WORD *Source;
4633             WORD *Dest;
4634             TRACE("Color keyed 5551\n");
4635             for (y = 0; y < height; y++) {
4636                 Source = (const WORD *)(src + y * pitch);
4637                 Dest = (WORD *) (dst + y * outpitch);
4638                 for (x = 0; x < width; x++ ) {
4639                     WORD color = *Source++;
4640                     *Dest = color;
4641                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4642                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4643                         *Dest |= (1 << 15);
4644                     else
4645                         *Dest &= ~(1 << 15);
4646                     Dest++;
4647                 }
4648             }
4649         }
4650         break;
4651
4652         case CONVERT_CK_RGB24:
4653         {
4654             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4655             unsigned int x, y;
4656             for (y = 0; y < height; y++)
4657             {
4658                 source = src + pitch * y;
4659                 dest = dst + outpitch * y;
4660                 for (x = 0; x < width; x++) {
4661                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4662                     DWORD dstcolor = color << 8;
4663                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4664                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4665                         dstcolor |= 0xff;
4666                     *(DWORD*)dest = dstcolor;
4667                     source += 3;
4668                     dest += 4;
4669                 }
4670             }
4671         }
4672         break;
4673
4674         case CONVERT_RGB32_888:
4675         {
4676             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4677             unsigned int x, y;
4678             for (y = 0; y < height; y++)
4679             {
4680                 source = src + pitch * y;
4681                 dest = dst + outpitch * y;
4682                 for (x = 0; x < width; x++) {
4683                     DWORD color = 0xffffff & *(const DWORD*)source;
4684                     DWORD dstcolor = color << 8;
4685                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4686                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4687                         dstcolor |= 0xff;
4688                     *(DWORD*)dest = dstcolor;
4689                     source += 4;
4690                     dest += 4;
4691                 }
4692             }
4693         }
4694         break;
4695
4696         default:
4697             ERR("Unsupported conversion type %#x.\n", convert);
4698     }
4699     return WINED3D_OK;
4700 }
4701
4702 BOOL palette9_changed(struct wined3d_surface *surface)
4703 {
4704     struct wined3d_device *device = surface->resource.device;
4705
4706     if (surface->palette || (surface->resource.format->id != WINED3DFMT_P8_UINT
4707             && surface->resource.format->id != WINED3DFMT_P8_UINT_A8_UNORM))
4708     {
4709         /* If a ddraw-style palette is attached assume no d3d9 palette change.
4710          * Also the palette isn't interesting if the surface format isn't P8 or A8P8
4711          */
4712         return FALSE;
4713     }
4714
4715     if (surface->palette9)
4716     {
4717         if (!memcmp(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256))
4718         {
4719             return FALSE;
4720         }
4721     }
4722     else
4723     {
4724         surface->palette9 = HeapAlloc(GetProcessHeap(), 0, sizeof(PALETTEENTRY) * 256);
4725     }
4726     memcpy(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256);
4727
4728     return TRUE;
4729 }
4730
4731 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4732 {
4733     /* Flip the surface contents */
4734     /* Flip the DC */
4735     {
4736         HDC tmp;
4737         tmp = front->hDC;
4738         front->hDC = back->hDC;
4739         back->hDC = tmp;
4740     }
4741
4742     /* Flip the DIBsection */
4743     {
4744         HBITMAP tmp;
4745         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4746         tmp = front->dib.DIBsection;
4747         front->dib.DIBsection = back->dib.DIBsection;
4748         back->dib.DIBsection = tmp;
4749
4750         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4751         else front->flags &= ~SFLAG_DIBSECTION;
4752         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4753         else back->flags &= ~SFLAG_DIBSECTION;
4754     }
4755
4756     /* Flip the surface data */
4757     {
4758         void* tmp;
4759
4760         tmp = front->dib.bitmap_data;
4761         front->dib.bitmap_data = back->dib.bitmap_data;
4762         back->dib.bitmap_data = tmp;
4763
4764         tmp = front->resource.allocatedMemory;
4765         front->resource.allocatedMemory = back->resource.allocatedMemory;
4766         back->resource.allocatedMemory = tmp;
4767
4768         tmp = front->resource.heapMemory;
4769         front->resource.heapMemory = back->resource.heapMemory;
4770         back->resource.heapMemory = tmp;
4771     }
4772
4773     /* Flip the PBO */
4774     {
4775         GLuint tmp_pbo = front->pbo;
4776         front->pbo = back->pbo;
4777         back->pbo = tmp_pbo;
4778     }
4779
4780     /* client_memory should not be different, but just in case */
4781     {
4782         BOOL tmp;
4783         tmp = front->dib.client_memory;
4784         front->dib.client_memory = back->dib.client_memory;
4785         back->dib.client_memory = tmp;
4786     }
4787
4788     /* Flip the opengl texture */
4789     {
4790         GLuint tmp;
4791
4792         tmp = back->texture_name;
4793         back->texture_name = front->texture_name;
4794         front->texture_name = tmp;
4795
4796         tmp = back->texture_name_srgb;
4797         back->texture_name_srgb = front->texture_name_srgb;
4798         front->texture_name_srgb = tmp;
4799
4800         tmp = back->rb_multisample;
4801         back->rb_multisample = front->rb_multisample;
4802         front->rb_multisample = tmp;
4803
4804         tmp = back->rb_resolved;
4805         back->rb_resolved = front->rb_resolved;
4806         front->rb_resolved = tmp;
4807
4808         resource_unload(&back->resource);
4809         resource_unload(&front->resource);
4810     }
4811
4812     {
4813         DWORD tmp_flags = back->flags;
4814         back->flags = front->flags;
4815         front->flags = tmp_flags;
4816     }
4817 }
4818
4819 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4820  * pixel copy calls. */
4821 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4822         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4823 {
4824     struct wined3d_device *device = dst_surface->resource.device;
4825     float xrel, yrel;
4826     UINT row;
4827     struct wined3d_context *context;
4828     BOOL upsidedown = FALSE;
4829     RECT dst_rect = *dst_rect_in;
4830
4831     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4832      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4833      */
4834     if(dst_rect.top > dst_rect.bottom) {
4835         UINT tmp = dst_rect.bottom;
4836         dst_rect.bottom = dst_rect.top;
4837         dst_rect.top = tmp;
4838         upsidedown = TRUE;
4839     }
4840
4841     context = context_acquire(device, src_surface);
4842     context_apply_blit_state(context, device);
4843     surface_internal_preload(dst_surface, SRGB_RGB);
4844     ENTER_GL();
4845
4846     /* Bind the target texture */
4847     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4848     if (surface_is_offscreen(src_surface))
4849     {
4850         TRACE("Reading from an offscreen target\n");
4851         upsidedown = !upsidedown;
4852         glReadBuffer(device->offscreenBuffer);
4853     }
4854     else
4855     {
4856         glReadBuffer(surface_get_gl_buffer(src_surface));
4857     }
4858     checkGLcall("glReadBuffer");
4859
4860     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4861     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4862
4863     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4864     {
4865         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4866
4867         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4868             ERR("Texture filtering not supported in direct blit\n");
4869         }
4870     }
4871     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4872             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4873     {
4874         ERR("Texture filtering not supported in direct blit\n");
4875     }
4876
4877     if (upsidedown
4878             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4879             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4880     {
4881         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4882
4883         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4884                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4885                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4886                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4887     }
4888     else
4889     {
4890         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4891         /* I have to process this row by row to swap the image,
4892          * otherwise it would be upside down, so stretching in y direction
4893          * doesn't cost extra time
4894          *
4895          * However, stretching in x direction can be avoided if not necessary
4896          */
4897         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4898             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4899             {
4900                 /* Well, that stuff works, but it's very slow.
4901                  * find a better way instead
4902                  */
4903                 UINT col;
4904
4905                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4906                 {
4907                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4908                             dst_rect.left + col /* x offset */, row /* y offset */,
4909                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4910                 }
4911             }
4912             else
4913             {
4914                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4915                         dst_rect.left /* x offset */, row /* y offset */,
4916                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4917             }
4918         }
4919     }
4920     checkGLcall("glCopyTexSubImage2D");
4921
4922     LEAVE_GL();
4923     context_release(context);
4924
4925     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4926      * path is never entered
4927      */
4928     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4929 }
4930
4931 /* Uses the hardware to stretch and flip the image */
4932 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4933         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4934 {
4935     struct wined3d_device *device = dst_surface->resource.device;
4936     struct wined3d_swapchain *src_swapchain = NULL;
4937     GLuint src, backup = 0;
4938     float left, right, top, bottom; /* Texture coordinates */
4939     UINT fbwidth = src_surface->resource.width;
4940     UINT fbheight = src_surface->resource.height;
4941     struct wined3d_context *context;
4942     GLenum drawBuffer = GL_BACK;
4943     GLenum texture_target;
4944     BOOL noBackBufferBackup;
4945     BOOL src_offscreen;
4946     BOOL upsidedown = FALSE;
4947     RECT dst_rect = *dst_rect_in;
4948
4949     TRACE("Using hwstretch blit\n");
4950     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4951     context = context_acquire(device, src_surface);
4952     context_apply_blit_state(context, device);
4953     surface_internal_preload(dst_surface, SRGB_RGB);
4954
4955     src_offscreen = surface_is_offscreen(src_surface);
4956     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4957     if (!noBackBufferBackup && !src_surface->texture_name)
4958     {
4959         /* Get it a description */
4960         surface_internal_preload(src_surface, SRGB_RGB);
4961     }
4962     ENTER_GL();
4963
4964     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4965      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4966      */
4967     if (context->aux_buffers >= 2)
4968     {
4969         /* Got more than one aux buffer? Use the 2nd aux buffer */
4970         drawBuffer = GL_AUX1;
4971     }
4972     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4973     {
4974         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4975         drawBuffer = GL_AUX0;
4976     }
4977
4978     if(noBackBufferBackup) {
4979         glGenTextures(1, &backup);
4980         checkGLcall("glGenTextures");
4981         context_bind_texture(context, GL_TEXTURE_2D, backup);
4982         texture_target = GL_TEXTURE_2D;
4983     } else {
4984         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4985          * we are reading from the back buffer, the backup can be used as source texture
4986          */
4987         texture_target = src_surface->texture_target;
4988         context_bind_texture(context, texture_target, src_surface->texture_name);
4989         glEnable(texture_target);
4990         checkGLcall("glEnable(texture_target)");
4991
4992         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4993         src_surface->flags &= ~SFLAG_INTEXTURE;
4994     }
4995
4996     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4997      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4998      */
4999     if(dst_rect.top > dst_rect.bottom) {
5000         UINT tmp = dst_rect.bottom;
5001         dst_rect.bottom = dst_rect.top;
5002         dst_rect.top = tmp;
5003         upsidedown = TRUE;
5004     }
5005
5006     if (src_offscreen)
5007     {
5008         TRACE("Reading from an offscreen target\n");
5009         upsidedown = !upsidedown;
5010         glReadBuffer(device->offscreenBuffer);
5011     }
5012     else
5013     {
5014         glReadBuffer(surface_get_gl_buffer(src_surface));
5015     }
5016
5017     /* TODO: Only back up the part that will be overwritten */
5018     glCopyTexSubImage2D(texture_target, 0,
5019                         0, 0 /* read offsets */,
5020                         0, 0,
5021                         fbwidth,
5022                         fbheight);
5023
5024     checkGLcall("glCopyTexSubImage2D");
5025
5026     /* No issue with overriding these - the sampler is dirty due to blit usage */
5027     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5028             wined3d_gl_mag_filter(magLookup, Filter));
5029     checkGLcall("glTexParameteri");
5030     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5031             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
5032     checkGLcall("glTexParameteri");
5033
5034     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5035         src_swapchain = src_surface->container.u.swapchain;
5036     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5037     {
5038         src = backup ? backup : src_surface->texture_name;
5039     }
5040     else
5041     {
5042         glReadBuffer(GL_FRONT);
5043         checkGLcall("glReadBuffer(GL_FRONT)");
5044
5045         glGenTextures(1, &src);
5046         checkGLcall("glGenTextures(1, &src)");
5047         context_bind_texture(context, GL_TEXTURE_2D, src);
5048
5049         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5050          * out for power of 2 sizes
5051          */
5052         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5053                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5054         checkGLcall("glTexImage2D");
5055         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5056                             0, 0 /* read offsets */,
5057                             0, 0,
5058                             fbwidth,
5059                             fbheight);
5060
5061         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5062         checkGLcall("glTexParameteri");
5063         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5064         checkGLcall("glTexParameteri");
5065
5066         glReadBuffer(GL_BACK);
5067         checkGLcall("glReadBuffer(GL_BACK)");
5068
5069         if(texture_target != GL_TEXTURE_2D) {
5070             glDisable(texture_target);
5071             glEnable(GL_TEXTURE_2D);
5072             texture_target = GL_TEXTURE_2D;
5073         }
5074     }
5075     checkGLcall("glEnd and previous");
5076
5077     left = src_rect->left;
5078     right = src_rect->right;
5079
5080     if (!upsidedown)
5081     {
5082         top = src_surface->resource.height - src_rect->top;
5083         bottom = src_surface->resource.height - src_rect->bottom;
5084     }
5085     else
5086     {
5087         top = src_surface->resource.height - src_rect->bottom;
5088         bottom = src_surface->resource.height - src_rect->top;
5089     }
5090
5091     if (src_surface->flags & SFLAG_NORMCOORD)
5092     {
5093         left /= src_surface->pow2Width;
5094         right /= src_surface->pow2Width;
5095         top /= src_surface->pow2Height;
5096         bottom /= src_surface->pow2Height;
5097     }
5098
5099     /* draw the source texture stretched and upside down. The correct surface is bound already */
5100     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5101     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5102
5103     context_set_draw_buffer(context, drawBuffer);
5104     glReadBuffer(drawBuffer);
5105
5106     glBegin(GL_QUADS);
5107         /* bottom left */
5108         glTexCoord2f(left, bottom);
5109         glVertex2i(0, 0);
5110
5111         /* top left */
5112         glTexCoord2f(left, top);
5113         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5114
5115         /* top right */
5116         glTexCoord2f(right, top);
5117         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5118
5119         /* bottom right */
5120         glTexCoord2f(right, bottom);
5121         glVertex2i(dst_rect.right - dst_rect.left, 0);
5122     glEnd();
5123     checkGLcall("glEnd and previous");
5124
5125     if (texture_target != dst_surface->texture_target)
5126     {
5127         glDisable(texture_target);
5128         glEnable(dst_surface->texture_target);
5129         texture_target = dst_surface->texture_target;
5130     }
5131
5132     /* Now read the stretched and upside down image into the destination texture */
5133     context_bind_texture(context, texture_target, dst_surface->texture_name);
5134     glCopyTexSubImage2D(texture_target,
5135                         0,
5136                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5137                         0, 0, /* We blitted the image to the origin */
5138                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5139     checkGLcall("glCopyTexSubImage2D");
5140
5141     if(drawBuffer == GL_BACK) {
5142         /* Write the back buffer backup back */
5143         if(backup) {
5144             if(texture_target != GL_TEXTURE_2D) {
5145                 glDisable(texture_target);
5146                 glEnable(GL_TEXTURE_2D);
5147                 texture_target = GL_TEXTURE_2D;
5148             }
5149             context_bind_texture(context, GL_TEXTURE_2D, backup);
5150         }
5151         else
5152         {
5153             if (texture_target != src_surface->texture_target)
5154             {
5155                 glDisable(texture_target);
5156                 glEnable(src_surface->texture_target);
5157                 texture_target = src_surface->texture_target;
5158             }
5159             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5160         }
5161
5162         glBegin(GL_QUADS);
5163             /* top left */
5164             glTexCoord2f(0.0f, 0.0f);
5165             glVertex2i(0, fbheight);
5166
5167             /* bottom left */
5168             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5169             glVertex2i(0, 0);
5170
5171             /* bottom right */
5172             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5173                     (float)fbheight / (float)src_surface->pow2Height);
5174             glVertex2i(fbwidth, 0);
5175
5176             /* top right */
5177             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5178             glVertex2i(fbwidth, fbheight);
5179         glEnd();
5180     }
5181     glDisable(texture_target);
5182     checkGLcall("glDisable(texture_target)");
5183
5184     /* Cleanup */
5185     if (src != src_surface->texture_name && src != backup)
5186     {
5187         glDeleteTextures(1, &src);
5188         checkGLcall("glDeleteTextures(1, &src)");
5189     }
5190     if(backup) {
5191         glDeleteTextures(1, &backup);
5192         checkGLcall("glDeleteTextures(1, &backup)");
5193     }
5194
5195     LEAVE_GL();
5196
5197     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5198
5199     context_release(context);
5200
5201     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5202      * path is never entered
5203      */
5204     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5205 }
5206
5207 /* Front buffer coordinates are always full screen coordinates, but our GL
5208  * drawable is limited to the window's client area. The sysmem and texture
5209  * copies do have the full screen size. Note that GL has a bottom-left
5210  * origin, while D3D has a top-left origin. */
5211 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5212 {
5213     UINT drawable_height;
5214
5215     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5216             && surface == surface->container.u.swapchain->front_buffer)
5217     {
5218         POINT offset = {0, 0};
5219         RECT windowsize;
5220
5221         ScreenToClient(window, &offset);
5222         OffsetRect(rect, offset.x, offset.y);
5223
5224         GetClientRect(window, &windowsize);
5225         drawable_height = windowsize.bottom - windowsize.top;
5226     }
5227     else
5228     {
5229         drawable_height = surface->resource.height;
5230     }
5231
5232     rect->top = drawable_height - rect->top;
5233     rect->bottom = drawable_height - rect->bottom;
5234 }
5235
5236 static void surface_blt_to_drawable(struct wined3d_device *device,
5237         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5238         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5239         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5240 {
5241     struct wined3d_context *context;
5242     RECT src_rect, dst_rect;
5243
5244     src_rect = *src_rect_in;
5245     dst_rect = *dst_rect_in;
5246
5247     /* Make sure the surface is up-to-date. This should probably use
5248      * surface_load_location() and worry about the destination surface too,
5249      * unless we're overwriting it completely. */
5250     surface_internal_preload(src_surface, SRGB_RGB);
5251
5252     /* Activate the destination context, set it up for blitting */
5253     context = context_acquire(device, dst_surface);
5254     context_apply_blit_state(context, device);
5255
5256     if (!surface_is_offscreen(dst_surface))
5257         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5258
5259     device->blitter->set_shader(device->blit_priv, context, src_surface);
5260
5261     ENTER_GL();
5262
5263     if (color_key)
5264     {
5265         glEnable(GL_ALPHA_TEST);
5266         checkGLcall("glEnable(GL_ALPHA_TEST)");
5267
5268         /* When the primary render target uses P8, the alpha component
5269          * contains the palette index. Which means that the colorkey is one of
5270          * the palette entries. In other cases pixels that should be masked
5271          * away have alpha set to 0. */
5272         if (primary_render_target_is_p8(device))
5273             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5274         else
5275             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5276         checkGLcall("glAlphaFunc");
5277     }
5278     else
5279     {
5280         glDisable(GL_ALPHA_TEST);
5281         checkGLcall("glDisable(GL_ALPHA_TEST)");
5282     }
5283
5284     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5285
5286     if (color_key)
5287     {
5288         glDisable(GL_ALPHA_TEST);
5289         checkGLcall("glDisable(GL_ALPHA_TEST)");
5290     }
5291
5292     LEAVE_GL();
5293
5294     /* Leave the opengl state valid for blitting */
5295     device->blitter->unset_shader(context->gl_info);
5296
5297     if (wined3d_settings.strict_draw_ordering
5298             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5299             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5300         wglFlush(); /* Flush to ensure ordering across contexts. */
5301
5302     context_release(context);
5303 }
5304
5305 /* Do not call while under the GL lock. */
5306 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5307 {
5308     struct wined3d_device *device = s->resource.device;
5309     const struct blit_shader *blitter;
5310
5311     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5312             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5313     if (!blitter)
5314     {
5315         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5316         return WINED3DERR_INVALIDCALL;
5317     }
5318
5319     return blitter->color_fill(device, s, rect, color);
5320 }
5321
5322 /* Do not call while under the GL lock. */
5323 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5324         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5325         WINED3DTEXTUREFILTERTYPE Filter)
5326 {
5327     struct wined3d_device *device = dst_surface->resource.device;
5328     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5329     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5330
5331     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5332             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5333             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5334
5335     /* Get the swapchain. One of the surfaces has to be a primary surface */
5336     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5337     {
5338         WARN("Destination is in sysmem, rejecting gl blt\n");
5339         return WINED3DERR_INVALIDCALL;
5340     }
5341
5342     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5343         dstSwapchain = dst_surface->container.u.swapchain;
5344
5345     if (src_surface)
5346     {
5347         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5348         {
5349             WARN("Src is in sysmem, rejecting gl blt\n");
5350             return WINED3DERR_INVALIDCALL;
5351         }
5352
5353         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5354             srcSwapchain = src_surface->container.u.swapchain;
5355     }
5356
5357     /* Early sort out of cases where no render target is used */
5358     if (!dstSwapchain && !srcSwapchain
5359             && src_surface != device->fb.render_targets[0]
5360             && dst_surface != device->fb.render_targets[0])
5361     {
5362         TRACE("No surface is render target, not using hardware blit.\n");
5363         return WINED3DERR_INVALIDCALL;
5364     }
5365
5366     /* No destination color keying supported */
5367     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5368     {
5369         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5370         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5371         return WINED3DERR_INVALIDCALL;
5372     }
5373
5374     if (dstSwapchain && dstSwapchain == srcSwapchain)
5375     {
5376         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5377         return WINED3DERR_INVALIDCALL;
5378     }
5379
5380     if (dstSwapchain && srcSwapchain)
5381     {
5382         FIXME("Implement hardware blit between two different swapchains\n");
5383         return WINED3DERR_INVALIDCALL;
5384     }
5385
5386     if (dstSwapchain)
5387     {
5388         /* Handled with regular texture -> swapchain blit */
5389         if (src_surface == device->fb.render_targets[0])
5390             TRACE("Blit from active render target to a swapchain\n");
5391     }
5392     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5393     {
5394         FIXME("Implement blit from a swapchain to the active render target\n");
5395         return WINED3DERR_INVALIDCALL;
5396     }
5397
5398     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5399     {
5400         /* Blit from render target to texture */
5401         BOOL stretchx;
5402
5403         /* P8 read back is not implemented */
5404         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5405                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5406         {
5407             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5408             return WINED3DERR_INVALIDCALL;
5409         }
5410
5411         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5412         {
5413             TRACE("Color keying not supported by frame buffer to texture blit\n");
5414             return WINED3DERR_INVALIDCALL;
5415             /* Destination color key is checked above */
5416         }
5417
5418         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5419             stretchx = TRUE;
5420         else
5421             stretchx = FALSE;
5422
5423         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5424          * flip the image nor scale it.
5425          *
5426          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5427          * -> If the app wants a image width an unscaled width, copy it line per line
5428          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5429          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5430          *    back buffer. This is slower than reading line per line, thus not used for flipping
5431          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5432          *    pixel by pixel. */
5433         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5434                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5435         {
5436             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5437             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5438         } else {
5439             TRACE("Using hardware stretching to flip / stretch the texture\n");
5440             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5441         }
5442
5443         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5444         {
5445             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5446             dst_surface->resource.allocatedMemory = NULL;
5447             dst_surface->resource.heapMemory = NULL;
5448         }
5449         else
5450         {
5451             dst_surface->flags &= ~SFLAG_INSYSMEM;
5452         }
5453
5454         return WINED3D_OK;
5455     }
5456     else if (src_surface)
5457     {
5458         /* Blit from offscreen surface to render target */
5459         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5460         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5461
5462         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5463
5464         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5465                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5466                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5467         {
5468             FIXME("Unsupported blit operation falling back to software\n");
5469             return WINED3DERR_INVALIDCALL;
5470         }
5471
5472         /* Color keying: Check if we have to do a color keyed blt,
5473          * and if not check if a color key is activated.
5474          *
5475          * Just modify the color keying parameters in the surface and restore them afterwards
5476          * The surface keeps track of the color key last used to load the opengl surface.
5477          * PreLoad will catch the change to the flags and color key and reload if necessary.
5478          */
5479         if (flags & WINEDDBLT_KEYSRC)
5480         {
5481             /* Use color key from surface */
5482         }
5483         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5484         {
5485             /* Use color key from DDBltFx */
5486             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5487             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5488         }
5489         else
5490         {
5491             /* Do not use color key */
5492             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5493         }
5494
5495         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5496                 src_surface, src_rect, dst_surface, dst_rect);
5497
5498         /* Restore the color key parameters */
5499         src_surface->CKeyFlags = oldCKeyFlags;
5500         src_surface->SrcBltCKey = oldBltCKey;
5501
5502         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5503
5504         return WINED3D_OK;
5505     }
5506
5507     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5508     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5509     return WINED3DERR_INVALIDCALL;
5510 }
5511
5512 /* GL locking is done by the caller */
5513 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5514         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5515 {
5516     struct wined3d_device *device = surface->resource.device;
5517     const struct wined3d_gl_info *gl_info = context->gl_info;
5518     GLint compare_mode = GL_NONE;
5519     struct blt_info info;
5520     GLint old_binding = 0;
5521     RECT rect;
5522
5523     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5524
5525     glDisable(GL_CULL_FACE);
5526     glDisable(GL_BLEND);
5527     glDisable(GL_ALPHA_TEST);
5528     glDisable(GL_SCISSOR_TEST);
5529     glDisable(GL_STENCIL_TEST);
5530     glEnable(GL_DEPTH_TEST);
5531     glDepthFunc(GL_ALWAYS);
5532     glDepthMask(GL_TRUE);
5533     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5534     glViewport(x, y, w, h);
5535
5536     SetRect(&rect, 0, h, w, 0);
5537     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5538     context_active_texture(context, context->gl_info, 0);
5539     glGetIntegerv(info.binding, &old_binding);
5540     glBindTexture(info.bind_target, texture);
5541     if (gl_info->supported[ARB_SHADOW])
5542     {
5543         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5544         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5545     }
5546
5547     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5548             gl_info, info.tex_type, &surface->ds_current_size);
5549
5550     glBegin(GL_TRIANGLE_STRIP);
5551     glTexCoord3fv(info.coords[0]);
5552     glVertex2f(-1.0f, -1.0f);
5553     glTexCoord3fv(info.coords[1]);
5554     glVertex2f(1.0f, -1.0f);
5555     glTexCoord3fv(info.coords[2]);
5556     glVertex2f(-1.0f, 1.0f);
5557     glTexCoord3fv(info.coords[3]);
5558     glVertex2f(1.0f, 1.0f);
5559     glEnd();
5560
5561     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5562     glBindTexture(info.bind_target, old_binding);
5563
5564     glPopAttrib();
5565
5566     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5567 }
5568
5569 void surface_modify_ds_location(struct wined3d_surface *surface,
5570         DWORD location, UINT w, UINT h)
5571 {
5572     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5573
5574     if (location & ~SFLAG_DS_LOCATIONS)
5575         FIXME("Invalid location (%#x) specified.\n", location);
5576
5577     surface->ds_current_size.cx = w;
5578     surface->ds_current_size.cy = h;
5579     surface->flags &= ~SFLAG_DS_LOCATIONS;
5580     surface->flags |= location;
5581 }
5582
5583 /* Context activation is done by the caller. */
5584 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5585 {
5586     struct wined3d_device *device = surface->resource.device;
5587     GLsizei w, h;
5588
5589     TRACE("surface %p, new location %#x.\n", surface, location);
5590
5591     /* TODO: Make this work for modes other than FBO */
5592     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5593
5594     if (!(surface->flags & location))
5595     {
5596         w = surface->ds_current_size.cx;
5597         h = surface->ds_current_size.cy;
5598         surface->ds_current_size.cx = 0;
5599         surface->ds_current_size.cy = 0;
5600     }
5601     else
5602     {
5603         w = surface->resource.width;
5604         h = surface->resource.height;
5605     }
5606
5607     if (surface->ds_current_size.cx == surface->resource.width
5608             && surface->ds_current_size.cy == surface->resource.height)
5609     {
5610         TRACE("Location (%#x) is already up to date.\n", location);
5611         return;
5612     }
5613
5614     if (surface->current_renderbuffer)
5615     {
5616         FIXME("Not supported with fixed up depth stencil.\n");
5617         return;
5618     }
5619
5620     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5621     {
5622         /* This mostly happens when a depth / stencil is used without being
5623          * cleared first. In principle we could upload from sysmem, or
5624          * explicitly clear before first usage. For the moment there don't
5625          * appear to be a lot of applications depending on this, so a FIXME
5626          * should do. */
5627         FIXME("No up to date depth stencil location.\n");
5628         surface->flags |= location;
5629         surface->ds_current_size.cx = surface->resource.width;
5630         surface->ds_current_size.cy = surface->resource.height;
5631         return;
5632     }
5633
5634     if (location == SFLAG_DS_OFFSCREEN)
5635     {
5636         GLint old_binding = 0;
5637         GLenum bind_target;
5638
5639         /* The render target is allowed to be smaller than the depth/stencil
5640          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5641          * than the offscreen surface. Don't overwrite the offscreen surface
5642          * with undefined data. */
5643         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5644         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5645
5646         TRACE("Copying onscreen depth buffer to depth texture.\n");
5647
5648         ENTER_GL();
5649
5650         if (!device->depth_blt_texture)
5651         {
5652             glGenTextures(1, &device->depth_blt_texture);
5653         }
5654
5655         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5656          * directly on the FBO texture. That's because we need to flip. */
5657         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5658                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5659         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5660         {
5661             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5662             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5663         }
5664         else
5665         {
5666             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5667             bind_target = GL_TEXTURE_2D;
5668         }
5669         glBindTexture(bind_target, device->depth_blt_texture);
5670         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5671          * internal format, because the internal format might include stencil
5672          * data. In principle we should copy stencil data as well, but unless
5673          * the driver supports stencil export it's hard to do, and doesn't
5674          * seem to be needed in practice. If the hardware doesn't support
5675          * writing stencil data, the glCopyTexImage2D() call might trigger
5676          * software fallbacks. */
5677         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5678         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5679         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5680         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5681         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5682         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5683         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5684         glBindTexture(bind_target, old_binding);
5685
5686         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5687                 NULL, surface, SFLAG_INTEXTURE);
5688         context_set_draw_buffer(context, GL_NONE);
5689         glReadBuffer(GL_NONE);
5690
5691         /* Do the actual blit */
5692         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5693         checkGLcall("depth_blt");
5694
5695         context_invalidate_state(context, STATE_FRAMEBUFFER);
5696
5697         LEAVE_GL();
5698
5699         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5700     }
5701     else if (location == SFLAG_DS_ONSCREEN)
5702     {
5703         TRACE("Copying depth texture to onscreen depth buffer.\n");
5704
5705         ENTER_GL();
5706
5707         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5708                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5709         surface_depth_blt(surface, context, surface->texture_name,
5710                 0, surface->pow2Height - h, w, h, surface->texture_target);
5711         checkGLcall("depth_blt");
5712
5713         context_invalidate_state(context, STATE_FRAMEBUFFER);
5714
5715         LEAVE_GL();
5716
5717         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5718     }
5719     else
5720     {
5721         ERR("Invalid location (%#x) specified.\n", location);
5722     }
5723
5724     surface->flags |= location;
5725     surface->ds_current_size.cx = surface->resource.width;
5726     surface->ds_current_size.cy = surface->resource.height;
5727 }
5728
5729 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5730 {
5731     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5732     struct wined3d_surface *overlay;
5733
5734     TRACE("surface %p, location %s, persistent %#x.\n",
5735             surface, debug_surflocation(location), persistent);
5736
5737     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5738             && (location & SFLAG_INDRAWABLE))
5739         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5740
5741     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5742             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5743         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5744
5745     if (persistent)
5746     {
5747         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5748                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5749         {
5750             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5751             {
5752                 TRACE("Passing to container.\n");
5753                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5754             }
5755         }
5756         surface->flags &= ~SFLAG_LOCATIONS;
5757         surface->flags |= location;
5758
5759         /* Redraw emulated overlays, if any */
5760         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5761         {
5762             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5763             {
5764                 overlay->surface_ops->surface_draw_overlay(overlay);
5765             }
5766         }
5767     }
5768     else
5769     {
5770         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5771         {
5772             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5773             {
5774                 TRACE("Passing to container\n");
5775                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5776             }
5777         }
5778         surface->flags &= ~location;
5779     }
5780
5781     if (!(surface->flags & SFLAG_LOCATIONS))
5782     {
5783         ERR("Surface %p does not have any up to date location.\n", surface);
5784     }
5785 }
5786
5787 static DWORD resource_access_from_location(DWORD location)
5788 {
5789     switch (location)
5790     {
5791         case SFLAG_INSYSMEM:
5792             return WINED3D_RESOURCE_ACCESS_CPU;
5793
5794         case SFLAG_INDRAWABLE:
5795         case SFLAG_INSRGBTEX:
5796         case SFLAG_INTEXTURE:
5797         case SFLAG_INRB_MULTISAMPLE:
5798         case SFLAG_INRB_RESOLVED:
5799             return WINED3D_RESOURCE_ACCESS_GPU;
5800
5801         default:
5802             FIXME("Unhandled location %#x.\n", location);
5803             return 0;
5804     }
5805 }
5806
5807 static void surface_load_sysmem(struct wined3d_surface *surface,
5808         const struct wined3d_gl_info *gl_info, const RECT *rect)
5809 {
5810     surface_prepare_system_memory(surface);
5811
5812     /* Download the surface to system memory. */
5813     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5814     {
5815         struct wined3d_device *device = surface->resource.device;
5816         struct wined3d_context *context;
5817
5818         /* TODO: Use already acquired context when possible. */
5819         context = context_acquire(device, NULL);
5820
5821         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5822         surface_download_data(surface, gl_info);
5823
5824         context_release(context);
5825
5826         return;
5827     }
5828
5829     /* Note: It might be faster to download into a texture first. */
5830     read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5831             wined3d_surface_get_pitch(surface));
5832 }
5833
5834 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5835         const struct wined3d_gl_info *gl_info, const RECT *rect)
5836 {
5837     struct wined3d_device *device = surface->resource.device;
5838     struct wined3d_format format;
5839     CONVERT_TYPES convert;
5840     UINT byte_count;
5841     BYTE *mem;
5842
5843     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5844     {
5845         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5846         return WINED3DERR_INVALIDCALL;
5847     }
5848
5849     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5850         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5851
5852     if (surface->flags & SFLAG_INTEXTURE)
5853     {
5854         RECT r;
5855
5856         surface_get_rect(surface, rect, &r);
5857         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5858
5859         return WINED3D_OK;
5860     }
5861
5862     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5863     {
5864         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5865          * path through sysmem. */
5866         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5867     }
5868
5869     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5870
5871     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5872      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5873      * called. */
5874     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5875     {
5876         struct wined3d_context *context;
5877
5878         TRACE("Removing the pbo attached to surface %p.\n", surface);
5879
5880         /* TODO: Use already acquired context when possible. */
5881         context = context_acquire(device, NULL);
5882
5883         surface_remove_pbo(surface, gl_info);
5884
5885         context_release(context);
5886     }
5887
5888     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5889     {
5890         UINT height = surface->resource.height;
5891         UINT width = surface->resource.width;
5892         UINT src_pitch, dst_pitch;
5893
5894         byte_count = format.conv_byte_count;
5895         src_pitch = wined3d_surface_get_pitch(surface);
5896
5897         /* Stick to the alignment for the converted surface too, makes it
5898          * easier to load the surface. */
5899         dst_pitch = width * byte_count;
5900         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5901
5902         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5903         {
5904             ERR("Out of memory (%u).\n", dst_pitch * height);
5905             return E_OUTOFMEMORY;
5906         }
5907
5908         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5909                 src_pitch, width, height, dst_pitch, convert, surface);
5910
5911         surface->flags |= SFLAG_CONVERTED;
5912     }
5913     else
5914     {
5915         surface->flags &= ~SFLAG_CONVERTED;
5916         mem = surface->resource.allocatedMemory;
5917         byte_count = format.byte_count;
5918     }
5919
5920     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5921
5922     /* Don't delete PBO memory. */
5923     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5924         HeapFree(GetProcessHeap(), 0, mem);
5925
5926     return WINED3D_OK;
5927 }
5928
5929 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5930         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5931 {
5932     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
5933     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5934     struct wined3d_device *device = surface->resource.device;
5935     struct wined3d_context *context;
5936     UINT width, src_pitch, dst_pitch;
5937     struct wined3d_bo_address data;
5938     struct wined3d_format format;
5939     POINT dst_point = {0, 0};
5940     CONVERT_TYPES convert;
5941     BYTE *mem;
5942
5943     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5944             && surface_is_offscreen(surface)
5945             && (surface->flags & SFLAG_INDRAWABLE))
5946     {
5947         read_from_framebuffer_texture(surface, srgb);
5948
5949         return WINED3D_OK;
5950     }
5951
5952     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5953             && (surface->resource.format->flags & attach_flags) == attach_flags
5954             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5955                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5956                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5957     {
5958         if (srgb)
5959             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
5960                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
5961         else
5962             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
5963                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
5964
5965         return WINED3D_OK;
5966     }
5967
5968     /* Upload from system memory */
5969
5970     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
5971             TRUE /* We will use textures */, &format, &convert);
5972
5973     if (srgb)
5974     {
5975         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
5976         {
5977             /* Performance warning... */
5978             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
5979             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5980         }
5981     }
5982     else
5983     {
5984         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
5985         {
5986             /* Performance warning... */
5987             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
5988             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5989         }
5990     }
5991
5992     if (!(surface->flags & SFLAG_INSYSMEM))
5993     {
5994         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
5995         /* Lets hope we get it from somewhere... */
5996         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5997     }
5998
5999     /* TODO: Use already acquired context when possible. */
6000     context = context_acquire(device, NULL);
6001
6002     surface_prepare_texture(surface, context, srgb);
6003     surface_bind_and_dirtify(surface, context, srgb);
6004
6005     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6006     {
6007         surface->flags |= SFLAG_GLCKEY;
6008         surface->glCKey = surface->SrcBltCKey;
6009     }
6010     else surface->flags &= ~SFLAG_GLCKEY;
6011
6012     width = surface->resource.width;
6013     src_pitch = wined3d_surface_get_pitch(surface);
6014
6015     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6016      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6017      * called. */
6018     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6019     {
6020         TRACE("Removing the pbo attached to surface %p.\n", surface);
6021         surface_remove_pbo(surface, gl_info);
6022     }
6023
6024     if (format.convert)
6025     {
6026         /* This code is entered for texture formats which need a fixup. */
6027         UINT height = surface->resource.height;
6028
6029         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6030         dst_pitch = width * format.conv_byte_count;
6031         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6032
6033         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6034         {
6035             ERR("Out of memory (%u).\n", dst_pitch * height);
6036             context_release(context);
6037             return E_OUTOFMEMORY;
6038         }
6039         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6040     }
6041     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6042     {
6043         /* This code is only entered for color keying fixups */
6044         UINT height = surface->resource.height;
6045
6046         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6047         dst_pitch = width * format.conv_byte_count;
6048         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6049
6050         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6051         {
6052             ERR("Out of memory (%u).\n", dst_pitch * height);
6053             context_release(context);
6054             return E_OUTOFMEMORY;
6055         }
6056         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6057                 width, height, dst_pitch, convert, surface);
6058     }
6059     else
6060     {
6061         mem = surface->resource.allocatedMemory;
6062     }
6063
6064     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6065     data.addr = mem;
6066     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6067
6068     context_release(context);
6069
6070     /* Don't delete PBO memory. */
6071     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6072         HeapFree(GetProcessHeap(), 0, mem);
6073
6074     return WINED3D_OK;
6075 }
6076
6077 static void surface_multisample_resolve(struct wined3d_surface *surface)
6078 {
6079     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6080
6081     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6082         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6083
6084     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6085             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6086 }
6087
6088 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6089 {
6090     struct wined3d_device *device = surface->resource.device;
6091     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6092     HRESULT hr;
6093
6094     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6095
6096     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6097     {
6098         if (location == SFLAG_INTEXTURE)
6099         {
6100             struct wined3d_context *context = context_acquire(device, NULL);
6101             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6102             context_release(context);
6103             return WINED3D_OK;
6104         }
6105         else
6106         {
6107             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6108             return WINED3DERR_INVALIDCALL;
6109         }
6110     }
6111
6112     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6113         location = SFLAG_INTEXTURE;
6114
6115     if (surface->flags & location)
6116     {
6117         TRACE("Location already up to date.\n");
6118         return WINED3D_OK;
6119     }
6120
6121     if (WARN_ON(d3d_surface))
6122     {
6123         DWORD required_access = resource_access_from_location(location);
6124         if ((surface->resource.access_flags & required_access) != required_access)
6125             WARN("Operation requires %#x access, but surface only has %#x.\n",
6126                     required_access, surface->resource.access_flags);
6127     }
6128
6129     if (!(surface->flags & SFLAG_LOCATIONS))
6130     {
6131         ERR("Surface %p does not have any up to date location.\n", surface);
6132         surface->flags |= SFLAG_LOST;
6133         return WINED3DERR_DEVICELOST;
6134     }
6135
6136     switch (location)
6137     {
6138         case SFLAG_INSYSMEM:
6139             surface_load_sysmem(surface, gl_info, rect);
6140             break;
6141
6142         case SFLAG_INDRAWABLE:
6143             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6144                 return hr;
6145             break;
6146
6147         case SFLAG_INRB_RESOLVED:
6148             surface_multisample_resolve(surface);
6149             break;
6150
6151         case SFLAG_INTEXTURE:
6152         case SFLAG_INSRGBTEX:
6153             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6154                 return hr;
6155             break;
6156
6157         default:
6158             ERR("Don't know how to handle location %#x.\n", location);
6159             break;
6160     }
6161
6162     if (!rect)
6163     {
6164         surface->flags |= location;
6165
6166         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6167             surface_evict_sysmem(surface);
6168     }
6169
6170     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6171             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6172     {
6173         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6174     }
6175
6176     return WINED3D_OK;
6177 }
6178
6179 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6180 {
6181     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6182
6183     /* Not on a swapchain - must be offscreen */
6184     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6185
6186     /* The front buffer is always onscreen */
6187     if (surface == swapchain->front_buffer) return FALSE;
6188
6189     /* If the swapchain is rendered to an FBO, the backbuffer is
6190      * offscreen, otherwise onscreen */
6191     return swapchain->render_to_fbo;
6192 }
6193
6194 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6195 /* Context activation is done by the caller. */
6196 static void ffp_blit_free(struct wined3d_device *device) { }
6197
6198 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6199 /* Context activation is done by the caller. */
6200 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6201 {
6202     BYTE table[256][4];
6203     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6204
6205     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6206
6207     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6208     ENTER_GL();
6209     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6210     LEAVE_GL();
6211 }
6212
6213 /* Context activation is done by the caller. */
6214 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6215 {
6216     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6217
6218     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6219      * else the surface is converted in software at upload time in LoadLocation.
6220      */
6221     if(fixup == COMPLEX_FIXUP_P8 && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6222         ffp_blit_p8_upload_palette(surface, context->gl_info);
6223
6224     ENTER_GL();
6225     glEnable(surface->texture_target);
6226     checkGLcall("glEnable(surface->texture_target)");
6227     LEAVE_GL();
6228     return WINED3D_OK;
6229 }
6230
6231 /* Context activation is done by the caller. */
6232 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6233 {
6234     ENTER_GL();
6235     glDisable(GL_TEXTURE_2D);
6236     checkGLcall("glDisable(GL_TEXTURE_2D)");
6237     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6238     {
6239         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6240         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6241     }
6242     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6243     {
6244         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6245         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6246     }
6247     LEAVE_GL();
6248 }
6249
6250 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6251         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6252         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6253 {
6254     enum complex_fixup src_fixup;
6255
6256     switch (blit_op)
6257     {
6258         case WINED3D_BLIT_OP_COLOR_BLIT:
6259             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6260                 return FALSE;
6261
6262             src_fixup = get_complex_fixup(src_format->color_fixup);
6263             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6264             {
6265                 TRACE("Checking support for fixup:\n");
6266                 dump_color_fixup_desc(src_format->color_fixup);
6267             }
6268
6269             if (!is_identity_fixup(dst_format->color_fixup))
6270             {
6271                 TRACE("Destination fixups are not supported\n");
6272                 return FALSE;
6273             }
6274
6275             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6276             {
6277                 TRACE("P8 fixup supported\n");
6278                 return TRUE;
6279             }
6280
6281             /* We only support identity conversions. */
6282             if (is_identity_fixup(src_format->color_fixup))
6283             {
6284                 TRACE("[OK]\n");
6285                 return TRUE;
6286             }
6287
6288             TRACE("[FAILED]\n");
6289             return FALSE;
6290
6291         case WINED3D_BLIT_OP_COLOR_FILL:
6292             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6293                 return FALSE;
6294
6295             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6296             {
6297                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6298                     return FALSE;
6299             }
6300             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6301             {
6302                 TRACE("Color fill not supported\n");
6303                 return FALSE;
6304             }
6305
6306             /* FIXME: We should reject color fills on formats with fixups,
6307              * but this would break P8 color fills for example. */
6308
6309             return TRUE;
6310
6311         case WINED3D_BLIT_OP_DEPTH_FILL:
6312             return TRUE;
6313
6314         default:
6315             TRACE("Unsupported blit_op=%d\n", blit_op);
6316             return FALSE;
6317     }
6318 }
6319
6320 /* Do not call while under the GL lock. */
6321 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6322         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6323 {
6324     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6325     struct wined3d_fb_state fb = {&dst_surface, NULL};
6326
6327     return device_clear_render_targets(device, 1, &fb,
6328             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6329 }
6330
6331 /* Do not call while under the GL lock. */
6332 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6333         struct wined3d_surface *surface, const RECT *rect, float depth)
6334 {
6335     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6336     struct wined3d_fb_state fb = {NULL, surface};
6337
6338     return device_clear_render_targets(device, 0, &fb,
6339             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6340 }
6341
6342 const struct blit_shader ffp_blit =  {
6343     ffp_blit_alloc,
6344     ffp_blit_free,
6345     ffp_blit_set,
6346     ffp_blit_unset,
6347     ffp_blit_supported,
6348     ffp_blit_color_fill,
6349     ffp_blit_depth_fill,
6350 };
6351
6352 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6353 {
6354     return WINED3D_OK;
6355 }
6356
6357 /* Context activation is done by the caller. */
6358 static void cpu_blit_free(struct wined3d_device *device)
6359 {
6360 }
6361
6362 /* Context activation is done by the caller. */
6363 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6364 {
6365     return WINED3D_OK;
6366 }
6367
6368 /* Context activation is done by the caller. */
6369 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6370 {
6371 }
6372
6373 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6374         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6375         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6376 {
6377     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6378     {
6379         return TRUE;
6380     }
6381
6382     return FALSE;
6383 }
6384
6385 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6386         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6387         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6388 {
6389     UINT row_block_count;
6390     const BYTE *src_row;
6391     BYTE *dst_row;
6392     UINT x, y;
6393
6394     src_row = src_data;
6395     dst_row = dst_data;
6396
6397     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6398
6399     if (!flags)
6400     {
6401         for (y = 0; y < update_h; y += format->block_height)
6402         {
6403             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6404             src_row += src_pitch;
6405             dst_row += dst_pitch;
6406         }
6407
6408         return WINED3D_OK;
6409     }
6410
6411     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6412     {
6413         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6414
6415         switch (format->id)
6416         {
6417             case WINED3DFMT_DXT1:
6418                 for (y = 0; y < update_h; y += format->block_height)
6419                 {
6420                     struct block
6421                     {
6422                         WORD color[2];
6423                         BYTE control_row[4];
6424                     };
6425
6426                     const struct block *s = (const struct block *)src_row;
6427                     struct block *d = (struct block *)dst_row;
6428
6429                     for (x = 0; x < row_block_count; ++x)
6430                     {
6431                         d[x].color[0] = s[x].color[0];
6432                         d[x].color[1] = s[x].color[1];
6433                         d[x].control_row[0] = s[x].control_row[3];
6434                         d[x].control_row[1] = s[x].control_row[2];
6435                         d[x].control_row[2] = s[x].control_row[1];
6436                         d[x].control_row[3] = s[x].control_row[0];
6437                     }
6438                     src_row -= src_pitch;
6439                     dst_row += dst_pitch;
6440                 }
6441                 return WINED3D_OK;
6442
6443             case WINED3DFMT_DXT3:
6444                 for (y = 0; y < update_h; y += format->block_height)
6445                 {
6446                     struct block
6447                     {
6448                         WORD alpha_row[4];
6449                         WORD color[2];
6450                         BYTE control_row[4];
6451                     };
6452
6453                     const struct block *s = (const struct block *)src_row;
6454                     struct block *d = (struct block *)dst_row;
6455
6456                     for (x = 0; x < row_block_count; ++x)
6457                     {
6458                         d[x].alpha_row[0] = s[x].alpha_row[3];
6459                         d[x].alpha_row[1] = s[x].alpha_row[2];
6460                         d[x].alpha_row[2] = s[x].alpha_row[1];
6461                         d[x].alpha_row[3] = s[x].alpha_row[0];
6462                         d[x].color[0] = s[x].color[0];
6463                         d[x].color[1] = s[x].color[1];
6464                         d[x].control_row[0] = s[x].control_row[3];
6465                         d[x].control_row[1] = s[x].control_row[2];
6466                         d[x].control_row[2] = s[x].control_row[1];
6467                         d[x].control_row[3] = s[x].control_row[0];
6468                     }
6469                     src_row -= src_pitch;
6470                     dst_row += dst_pitch;
6471                 }
6472                 return WINED3D_OK;
6473
6474             default:
6475                 FIXME("Compressed flip not implemented for format %s.\n",
6476                         debug_d3dformat(format->id));
6477                 return E_NOTIMPL;
6478         }
6479     }
6480
6481     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6482             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6483
6484     return E_NOTIMPL;
6485 }
6486
6487 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6488         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6489         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6490 {
6491     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6492     const struct wined3d_format *src_format, *dst_format;
6493     struct wined3d_surface *orig_src = src_surface;
6494     WINED3DLOCKED_RECT dlock, slock;
6495     HRESULT hr = WINED3D_OK;
6496     const BYTE *sbuf;
6497     RECT xdst,xsrc;
6498     BYTE *dbuf;
6499     int x, y;
6500
6501     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6502             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6503             flags, fx, debug_d3dtexturefiltertype(filter));
6504
6505     xsrc = *src_rect;
6506
6507     if (!src_surface)
6508     {
6509         RECT full_rect;
6510
6511         full_rect.left = 0;
6512         full_rect.top = 0;
6513         full_rect.right = dst_surface->resource.width;
6514         full_rect.bottom = dst_surface->resource.height;
6515         IntersectRect(&xdst, &full_rect, dst_rect);
6516     }
6517     else
6518     {
6519         BOOL clip_horiz, clip_vert;
6520
6521         xdst = *dst_rect;
6522         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6523         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6524
6525         if (clip_vert || clip_horiz)
6526         {
6527             /* Now check if this is a special case or not... */
6528             if ((flags & WINEDDBLT_DDFX)
6529                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6530                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6531             {
6532                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6533                 return WINED3D_OK;
6534             }
6535
6536             if (clip_horiz)
6537             {
6538                 if (xdst.left < 0)
6539                 {
6540                     xsrc.left -= xdst.left;
6541                     xdst.left = 0;
6542                 }
6543                 if (xdst.right > dst_surface->resource.width)
6544                 {
6545                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6546                     xdst.right = (int)dst_surface->resource.width;
6547                 }
6548             }
6549
6550             if (clip_vert)
6551             {
6552                 if (xdst.top < 0)
6553                 {
6554                     xsrc.top -= xdst.top;
6555                     xdst.top = 0;
6556                 }
6557                 if (xdst.bottom > dst_surface->resource.height)
6558                 {
6559                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6560                     xdst.bottom = (int)dst_surface->resource.height;
6561                 }
6562             }
6563
6564             /* And check if after clipping something is still to be done... */
6565             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6566                     || (xdst.left >= (int)dst_surface->resource.width)
6567                     || (xdst.top >= (int)dst_surface->resource.height)
6568                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6569                     || (xsrc.left >= (int)src_surface->resource.width)
6570                     || (xsrc.top >= (int)src_surface->resource.height))
6571             {
6572                 TRACE("Nothing to be done after clipping.\n");
6573                 return WINED3D_OK;
6574             }
6575         }
6576     }
6577
6578     if (src_surface == dst_surface)
6579     {
6580         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6581         slock = dlock;
6582         src_format = dst_surface->resource.format;
6583         dst_format = src_format;
6584     }
6585     else
6586     {
6587         dst_format = dst_surface->resource.format;
6588         if (src_surface)
6589         {
6590             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6591             {
6592                 src_surface = surface_convert_format(src_surface, dst_format->id);
6593                 if (!src_surface)
6594                 {
6595                     /* The conv function writes a FIXME */
6596                     WARN("Cannot convert source surface format to dest format.\n");
6597                     goto release;
6598                 }
6599             }
6600             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6601             src_format = src_surface->resource.format;
6602         }
6603         else
6604         {
6605             src_format = dst_format;
6606         }
6607         if (dst_rect)
6608             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6609         else
6610             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6611     }
6612
6613     bpp = dst_surface->resource.format->byte_count;
6614     srcheight = xsrc.bottom - xsrc.top;
6615     srcwidth = xsrc.right - xsrc.left;
6616     dstheight = xdst.bottom - xdst.top;
6617     dstwidth = xdst.right - xdst.left;
6618     width = (xdst.right - xdst.left) * bpp;
6619
6620     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
6621     {
6622         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6623
6624         if (src_surface == dst_surface)
6625         {
6626             FIXME("Only plain blits supported on compressed surfaces.\n");
6627             hr = E_NOTIMPL;
6628             goto release;
6629         }
6630
6631         if (srcheight != dstheight || srcwidth != dstwidth)
6632         {
6633             WARN("Stretching not supported on compressed surfaces.\n");
6634             hr = WINED3DERR_INVALIDCALL;
6635             goto release;
6636         }
6637
6638         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6639         {
6640             WARN("Rectangle not block-aligned.\n");
6641             hr = WINED3DERR_INVALIDCALL;
6642             goto release;
6643         }
6644
6645         hr = surface_cpu_blt_compressed(slock.pBits, dlock.pBits,
6646                 slock.Pitch, dlock.Pitch, dstwidth, dstheight,
6647                 src_format, flags, fx);
6648         goto release;
6649     }
6650
6651     if (dst_rect && src_surface != dst_surface)
6652         dbuf = dlock.pBits;
6653     else
6654         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6655
6656     /* First, all the 'source-less' blits */
6657     if (flags & WINEDDBLT_COLORFILL)
6658     {
6659         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6660         flags &= ~WINEDDBLT_COLORFILL;
6661     }
6662
6663     if (flags & WINEDDBLT_DEPTHFILL)
6664     {
6665         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6666     }
6667     if (flags & WINEDDBLT_ROP)
6668     {
6669         /* Catch some degenerate cases here. */
6670         switch (fx->dwROP)
6671         {
6672             case BLACKNESS:
6673                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6674                 break;
6675             case 0xAA0029: /* No-op */
6676                 break;
6677             case WHITENESS:
6678                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6679                 break;
6680             case SRCCOPY: /* Well, we do that below? */
6681                 break;
6682             default:
6683                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6684                 goto error;
6685         }
6686         flags &= ~WINEDDBLT_ROP;
6687     }
6688     if (flags & WINEDDBLT_DDROPS)
6689     {
6690         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6691     }
6692     /* Now the 'with source' blits. */
6693     if (src_surface)
6694     {
6695         const BYTE *sbase;
6696         int sx, xinc, sy, yinc;
6697
6698         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6699             goto release;
6700
6701         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6702                 && (srcwidth != dstwidth || srcheight != dstheight))
6703         {
6704             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6705             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6706         }
6707
6708         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6709         xinc = (srcwidth << 16) / dstwidth;
6710         yinc = (srcheight << 16) / dstheight;
6711
6712         if (!flags)
6713         {
6714             /* No effects, we can cheat here. */
6715             if (dstwidth == srcwidth)
6716             {
6717                 if (dstheight == srcheight)
6718                 {
6719                     /* No stretching in either direction. This needs to be as
6720                      * fast as possible. */
6721                     sbuf = sbase;
6722
6723                     /* Check for overlapping surfaces. */
6724                     if (src_surface != dst_surface || xdst.top < xsrc.top
6725                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6726                     {
6727                         /* No overlap, or dst above src, so copy from top downwards. */
6728                         for (y = 0; y < dstheight; ++y)
6729                         {
6730                             memcpy(dbuf, sbuf, width);
6731                             sbuf += slock.Pitch;
6732                             dbuf += dlock.Pitch;
6733                         }
6734                     }
6735                     else if (xdst.top > xsrc.top)
6736                     {
6737                         /* Copy from bottom upwards. */
6738                         sbuf += (slock.Pitch*dstheight);
6739                         dbuf += (dlock.Pitch*dstheight);
6740                         for (y = 0; y < dstheight; ++y)
6741                         {
6742                             sbuf -= slock.Pitch;
6743                             dbuf -= dlock.Pitch;
6744                             memcpy(dbuf, sbuf, width);
6745                         }
6746                     }
6747                     else
6748                     {
6749                         /* Src and dst overlapping on the same line, use memmove. */
6750                         for (y = 0; y < dstheight; ++y)
6751                         {
6752                             memmove(dbuf, sbuf, width);
6753                             sbuf += slock.Pitch;
6754                             dbuf += dlock.Pitch;
6755                         }
6756                     }
6757                 }
6758                 else
6759                 {
6760                     /* Stretching in y direction only. */
6761                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6762                     {
6763                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6764                         memcpy(dbuf, sbuf, width);
6765                         dbuf += dlock.Pitch;
6766                     }
6767                 }
6768             }
6769             else
6770             {
6771                 /* Stretching in X direction. */
6772                 int last_sy = -1;
6773                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6774                 {
6775                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6776
6777                     if ((sy >> 16) == (last_sy >> 16))
6778                     {
6779                         /* This source row is the same as last source row -
6780                          * Copy the already stretched row. */
6781                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6782                     }
6783                     else
6784                     {
6785 #define STRETCH_ROW(type) \
6786 do { \
6787     const type *s = (const type *)sbuf; \
6788     type *d = (type *)dbuf; \
6789     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6790         d[x] = s[sx >> 16]; \
6791 } while(0)
6792
6793                         switch(bpp)
6794                         {
6795                             case 1:
6796                                 STRETCH_ROW(BYTE);
6797                                 break;
6798                             case 2:
6799                                 STRETCH_ROW(WORD);
6800                                 break;
6801                             case 4:
6802                                 STRETCH_ROW(DWORD);
6803                                 break;
6804                             case 3:
6805                             {
6806                                 const BYTE *s;
6807                                 BYTE *d = dbuf;
6808                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6809                                 {
6810                                     DWORD pixel;
6811
6812                                     s = sbuf + 3 * (sx >> 16);
6813                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6814                                     d[0] = (pixel      ) & 0xff;
6815                                     d[1] = (pixel >>  8) & 0xff;
6816                                     d[2] = (pixel >> 16) & 0xff;
6817                                     d += 3;
6818                                 }
6819                                 break;
6820                             }
6821                             default:
6822                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6823                                 hr = WINED3DERR_NOTAVAILABLE;
6824                                 goto error;
6825                         }
6826 #undef STRETCH_ROW
6827                     }
6828                     dbuf += dlock.Pitch;
6829                     last_sy = sy;
6830                 }
6831             }
6832         }
6833         else
6834         {
6835             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6836             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6837             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6838             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6839             {
6840                 /* The color keying flags are checked for correctness in ddraw */
6841                 if (flags & WINEDDBLT_KEYSRC)
6842                 {
6843                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6844                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6845                 }
6846                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6847                 {
6848                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6849                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6850                 }
6851
6852                 if (flags & WINEDDBLT_KEYDEST)
6853                 {
6854                     /* Destination color keys are taken from the source surface! */
6855                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6856                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6857                 }
6858                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6859                 {
6860                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6861                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6862                 }
6863
6864                 if (bpp == 1)
6865                 {
6866                     keymask = 0xff;
6867                 }
6868                 else
6869                 {
6870                     keymask = src_format->red_mask
6871                             | src_format->green_mask
6872                             | src_format->blue_mask;
6873                 }
6874                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6875             }
6876
6877             if (flags & WINEDDBLT_DDFX)
6878             {
6879                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6880                 LONG tmpxy;
6881                 dTopLeft     = dbuf;
6882                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6883                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6884                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6885
6886                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6887                 {
6888                     /* I don't think we need to do anything about this flag */
6889                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6890                 }
6891                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6892                 {
6893                     tmp          = dTopRight;
6894                     dTopRight    = dTopLeft;
6895                     dTopLeft     = tmp;
6896                     tmp          = dBottomRight;
6897                     dBottomRight = dBottomLeft;
6898                     dBottomLeft  = tmp;
6899                     dstxinc = dstxinc * -1;
6900                 }
6901                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6902                 {
6903                     tmp          = dTopLeft;
6904                     dTopLeft     = dBottomLeft;
6905                     dBottomLeft  = tmp;
6906                     tmp          = dTopRight;
6907                     dTopRight    = dBottomRight;
6908                     dBottomRight = tmp;
6909                     dstyinc = dstyinc * -1;
6910                 }
6911                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6912                 {
6913                     /* I don't think we need to do anything about this flag */
6914                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6915                 }
6916                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6917                 {
6918                     tmp          = dBottomRight;
6919                     dBottomRight = dTopLeft;
6920                     dTopLeft     = tmp;
6921                     tmp          = dBottomLeft;
6922                     dBottomLeft  = dTopRight;
6923                     dTopRight    = tmp;
6924                     dstxinc = dstxinc * -1;
6925                     dstyinc = dstyinc * -1;
6926                 }
6927                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6928                 {
6929                     tmp          = dTopLeft;
6930                     dTopLeft     = dBottomLeft;
6931                     dBottomLeft  = dBottomRight;
6932                     dBottomRight = dTopRight;
6933                     dTopRight    = tmp;
6934                     tmpxy   = dstxinc;
6935                     dstxinc = dstyinc;
6936                     dstyinc = tmpxy;
6937                     dstxinc = dstxinc * -1;
6938                 }
6939                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6940                 {
6941                     tmp          = dTopLeft;
6942                     dTopLeft     = dTopRight;
6943                     dTopRight    = dBottomRight;
6944                     dBottomRight = dBottomLeft;
6945                     dBottomLeft  = tmp;
6946                     tmpxy   = dstxinc;
6947                     dstxinc = dstyinc;
6948                     dstyinc = tmpxy;
6949                     dstyinc = dstyinc * -1;
6950                 }
6951                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6952                 {
6953                     /* I don't think we need to do anything about this flag */
6954                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6955                 }
6956                 dbuf = dTopLeft;
6957                 flags &= ~(WINEDDBLT_DDFX);
6958             }
6959
6960 #define COPY_COLORKEY_FX(type) \
6961 do { \
6962     const type *s; \
6963     type *d = (type *)dbuf, *dx, tmp; \
6964     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6965     { \
6966         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6967         dx = d; \
6968         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6969         { \
6970             tmp = s[sx >> 16]; \
6971             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6972                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6973             { \
6974                 dx[0] = tmp; \
6975             } \
6976             dx = (type *)(((BYTE *)dx) + dstxinc); \
6977         } \
6978         d = (type *)(((BYTE *)d) + dstyinc); \
6979     } \
6980 } while(0)
6981
6982             switch (bpp)
6983             {
6984                 case 1:
6985                     COPY_COLORKEY_FX(BYTE);
6986                     break;
6987                 case 2:
6988                     COPY_COLORKEY_FX(WORD);
6989                     break;
6990                 case 4:
6991                     COPY_COLORKEY_FX(DWORD);
6992                     break;
6993                 case 3:
6994                 {
6995                     const BYTE *s;
6996                     BYTE *d = dbuf, *dx;
6997                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6998                     {
6999                         sbuf = sbase + (sy >> 16) * slock.Pitch;
7000                         dx = d;
7001                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7002                         {
7003                             DWORD pixel, dpixel = 0;
7004                             s = sbuf + 3 * (sx>>16);
7005                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7006                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7007                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7008                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7009                             {
7010                                 dx[0] = (pixel      ) & 0xff;
7011                                 dx[1] = (pixel >>  8) & 0xff;
7012                                 dx[2] = (pixel >> 16) & 0xff;
7013                             }
7014                             dx += dstxinc;
7015                         }
7016                         d += dstyinc;
7017                     }
7018                     break;
7019                 }
7020                 default:
7021                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7022                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7023                     hr = WINED3DERR_NOTAVAILABLE;
7024                     goto error;
7025 #undef COPY_COLORKEY_FX
7026             }
7027         }
7028     }
7029
7030 error:
7031     if (flags && FIXME_ON(d3d_surface))
7032     {
7033         FIXME("\tUnsupported flags: %#x.\n", flags);
7034     }
7035
7036 release:
7037     wined3d_surface_unmap(dst_surface);
7038     if (src_surface && src_surface != dst_surface)
7039         wined3d_surface_unmap(src_surface);
7040     /* Release the converted surface, if any. */
7041     if (src_surface && src_surface != orig_src)
7042         wined3d_surface_decref(src_surface);
7043
7044     return hr;
7045 }
7046
7047 /* Do not call while under the GL lock. */
7048 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7049         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7050 {
7051     static const RECT src_rect;
7052     WINEDDBLTFX BltFx;
7053
7054     memset(&BltFx, 0, sizeof(BltFx));
7055     BltFx.dwSize = sizeof(BltFx);
7056     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7057     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7058             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7059 }
7060
7061 /* Do not call while under the GL lock. */
7062 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7063         struct wined3d_surface *surface, const RECT *rect, float depth)
7064 {
7065     FIXME("Depth filling not implemented by cpu_blit.\n");
7066     return WINED3DERR_INVALIDCALL;
7067 }
7068
7069 const struct blit_shader cpu_blit =  {
7070     cpu_blit_alloc,
7071     cpu_blit_free,
7072     cpu_blit_set,
7073     cpu_blit_unset,
7074     cpu_blit_supported,
7075     cpu_blit_color_fill,
7076     cpu_blit_depth_fill,
7077 };
7078
7079 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7080         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7081         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7082         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7083 {
7084     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7085     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7086     unsigned int resource_size;
7087     HRESULT hr;
7088
7089     if (multisample_quality > 0)
7090     {
7091         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7092         multisample_quality = 0;
7093     }
7094
7095     /* Quick lockable sanity check.
7096      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7097      * this function is too deep to need to care about things like this.
7098      * Levels need to be checked too, since they all affect what can be done. */
7099     switch (pool)
7100     {
7101         case WINED3DPOOL_SCRATCH:
7102             if (!lockable)
7103             {
7104                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7105                         "which are mutually exclusive, setting lockable to TRUE.\n");
7106                 lockable = TRUE;
7107             }
7108             break;
7109
7110         case WINED3DPOOL_SYSTEMMEM:
7111             if (!lockable)
7112                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7113             break;
7114
7115         case WINED3DPOOL_MANAGED:
7116             if (usage & WINED3DUSAGE_DYNAMIC)
7117                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7118             break;
7119
7120         case WINED3DPOOL_DEFAULT:
7121             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7122                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7123             break;
7124
7125         default:
7126             FIXME("Unknown pool %#x.\n", pool);
7127             break;
7128     };
7129
7130     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7131         FIXME("Trying to create a render target that isn't in the default pool.\n");
7132
7133     /* FIXME: Check that the format is supported by the device. */
7134
7135     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7136     if (!resource_size)
7137         return WINED3DERR_INVALIDCALL;
7138
7139     surface->surface_type = surface_type;
7140
7141     switch (surface_type)
7142     {
7143         case SURFACE_OPENGL:
7144             surface->surface_ops = &surface_ops;
7145             break;
7146
7147         case SURFACE_GDI:
7148             surface->surface_ops = &gdi_surface_ops;
7149             break;
7150
7151         default:
7152             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7153             return WINED3DERR_INVALIDCALL;
7154     }
7155
7156     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7157             multisample_type, multisample_quality, usage, pool, width, height, 1,
7158             resource_size, parent, parent_ops, &surface_resource_ops);
7159     if (FAILED(hr))
7160     {
7161         WARN("Failed to initialize resource, returning %#x.\n", hr);
7162         return hr;
7163     }
7164
7165     /* "Standalone" surface. */
7166     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7167
7168     surface->texture_level = level;
7169     list_init(&surface->overlays);
7170
7171     /* Flags */
7172     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7173     if (discard)
7174         surface->flags |= SFLAG_DISCARD;
7175     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7176         surface->flags |= SFLAG_LOCKABLE;
7177     /* I'm not sure if this qualifies as a hack or as an optimization. It
7178      * seems reasonable to assume that lockable render targets will get
7179      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7180      * creation. However, the other reason we want to do this is that several
7181      * ddraw applications access surface memory while the surface isn't
7182      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7183      * future locks prevents these from crashing. */
7184     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7185         surface->flags |= SFLAG_DYNLOCK;
7186
7187     /* Mark the texture as dirty so that it gets loaded first time around. */
7188     surface_add_dirty_rect(surface, NULL);
7189     list_init(&surface->renderbuffers);
7190
7191     TRACE("surface %p, memory %p, size %u\n",
7192             surface, surface->resource.allocatedMemory, surface->resource.size);
7193
7194     /* Call the private setup routine */
7195     hr = surface->surface_ops->surface_private_setup(surface);
7196     if (FAILED(hr))
7197     {
7198         ERR("Private setup failed, returning %#x\n", hr);
7199         surface->surface_ops->surface_cleanup(surface);
7200         return hr;
7201     }
7202
7203     return hr;
7204 }
7205
7206 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7207         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7208         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7209         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7210 {
7211     struct wined3d_surface *object;
7212     HRESULT hr;
7213
7214     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7215             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7216     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7217             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7218     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7219
7220     if (surface_type == SURFACE_OPENGL && !device->adapter)
7221     {
7222         ERR("OpenGL surfaces are not available without OpenGL.\n");
7223         return WINED3DERR_NOTAVAILABLE;
7224     }
7225
7226     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7227     if (!object)
7228     {
7229         ERR("Failed to allocate surface memory.\n");
7230         return WINED3DERR_OUTOFVIDEOMEMORY;
7231     }
7232
7233     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7234             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7235     if (FAILED(hr))
7236     {
7237         WARN("Failed to initialize surface, returning %#x.\n", hr);
7238         HeapFree(GetProcessHeap(), 0, object);
7239         return hr;
7240     }
7241
7242     TRACE("Created surface %p.\n", object);
7243     *surface = object;
7244
7245     return hr;
7246 }