wined3d: Get rid of the haveHardwareCursor flag.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT surface_cpu_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans);
41 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
42         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
43         WINED3DTEXTUREFILTERTYPE filter);
44
45 static void surface_cleanup(struct wined3d_surface *surface)
46 {
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO) || !list_empty(&surface->renderbuffers))
50     {
51         struct wined3d_renderbuffer_entry *entry, *entry2;
52         const struct wined3d_gl_info *gl_info;
53         struct wined3d_context *context;
54
55         context = context_acquire(surface->resource.device, NULL);
56         gl_info = context->gl_info;
57
58         ENTER_GL();
59
60         if (surface->texture_name)
61         {
62             TRACE("Deleting texture %u.\n", surface->texture_name);
63             glDeleteTextures(1, &surface->texture_name);
64         }
65
66         if (surface->flags & SFLAG_PBO)
67         {
68             TRACE("Deleting PBO %u.\n", surface->pbo);
69             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
70         }
71
72         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
73         {
74             TRACE("Deleting renderbuffer %u.\n", entry->id);
75             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
76             HeapFree(GetProcessHeap(), 0, entry);
77         }
78
79         LEAVE_GL();
80
81         context_release(context);
82     }
83
84     if (surface->flags & SFLAG_DIBSECTION)
85     {
86         /* Release the DC. */
87         SelectObject(surface->hDC, surface->dib.holdbitmap);
88         DeleteDC(surface->hDC);
89         /* Release the DIB section. */
90         DeleteObject(surface->dib.DIBsection);
91         surface->dib.bitmap_data = NULL;
92         surface->resource.allocatedMemory = NULL;
93     }
94
95     if (surface->flags & SFLAG_USERPTR)
96         wined3d_surface_set_mem(surface, NULL);
97     if (surface->overlay_dest)
98         list_remove(&surface->overlay_entry);
99
100     HeapFree(GetProcessHeap(), 0, surface->palette9);
101
102     resource_cleanup(&surface->resource);
103 }
104
105 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
106 {
107     TRACE("surface %p, container %p.\n", surface, container);
108
109     if (!container && type != WINED3D_CONTAINER_NONE)
110         ERR("Setting NULL container of type %#x.\n", type);
111
112     if (type == WINED3D_CONTAINER_SWAPCHAIN)
113     {
114         surface->get_drawable_size = get_drawable_size_swapchain;
115     }
116     else
117     {
118         switch (wined3d_settings.offscreen_rendering_mode)
119         {
120             case ORM_FBO:
121                 surface->get_drawable_size = get_drawable_size_fbo;
122                 break;
123
124             case ORM_BACKBUFFER:
125                 surface->get_drawable_size = get_drawable_size_backbuffer;
126                 break;
127
128             default:
129                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
130                 return;
131         }
132     }
133
134     surface->container.type = type;
135     surface->container.u.base = container;
136 }
137
138 struct blt_info
139 {
140     GLenum binding;
141     GLenum bind_target;
142     enum tex_types tex_type;
143     GLfloat coords[4][3];
144 };
145
146 struct float_rect
147 {
148     float l;
149     float t;
150     float r;
151     float b;
152 };
153
154 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
155 {
156     f->l = ((r->left * 2.0f) / w) - 1.0f;
157     f->t = ((r->top * 2.0f) / h) - 1.0f;
158     f->r = ((r->right * 2.0f) / w) - 1.0f;
159     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
160 }
161
162 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
163 {
164     GLfloat (*coords)[3] = info->coords;
165     struct float_rect f;
166
167     switch (target)
168     {
169         default:
170             FIXME("Unsupported texture target %#x\n", target);
171             /* Fall back to GL_TEXTURE_2D */
172         case GL_TEXTURE_2D:
173             info->binding = GL_TEXTURE_BINDING_2D;
174             info->bind_target = GL_TEXTURE_2D;
175             info->tex_type = tex_2d;
176             coords[0][0] = (float)rect->left / w;
177             coords[0][1] = (float)rect->top / h;
178             coords[0][2] = 0.0f;
179
180             coords[1][0] = (float)rect->right / w;
181             coords[1][1] = (float)rect->top / h;
182             coords[1][2] = 0.0f;
183
184             coords[2][0] = (float)rect->left / w;
185             coords[2][1] = (float)rect->bottom / h;
186             coords[2][2] = 0.0f;
187
188             coords[3][0] = (float)rect->right / w;
189             coords[3][1] = (float)rect->bottom / h;
190             coords[3][2] = 0.0f;
191             break;
192
193         case GL_TEXTURE_RECTANGLE_ARB:
194             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
195             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
196             info->tex_type = tex_rect;
197             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
198             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
199             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
200             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
201             break;
202
203         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
204             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
205             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
206             info->tex_type = tex_cube;
207             cube_coords_float(rect, w, h, &f);
208
209             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
210             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
211             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
212             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
213             break;
214
215         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
216             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
217             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
218             info->tex_type = tex_cube;
219             cube_coords_float(rect, w, h, &f);
220
221             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
222             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
223             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
224             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
225             break;
226
227         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
228             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
229             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
230             info->tex_type = tex_cube;
231             cube_coords_float(rect, w, h, &f);
232
233             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
234             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
235             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
236             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
237             break;
238
239         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
240             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
241             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
242             info->tex_type = tex_cube;
243             cube_coords_float(rect, w, h, &f);
244
245             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
246             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
247             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
248             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
249             break;
250
251         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
252             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
253             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
254             info->tex_type = tex_cube;
255             cube_coords_float(rect, w, h, &f);
256
257             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
258             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
259             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
260             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
261             break;
262
263         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
264             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
265             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
266             info->tex_type = tex_cube;
267             cube_coords_float(rect, w, h, &f);
268
269             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
270             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
271             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
272             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
273             break;
274     }
275 }
276
277 static inline void surface_get_rect(struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
278 {
279     if (rect_in)
280         *rect_out = *rect_in;
281     else
282     {
283         rect_out->left = 0;
284         rect_out->top = 0;
285         rect_out->right = surface->resource.width;
286         rect_out->bottom = surface->resource.height;
287     }
288 }
289
290 /* GL locking and context activation is done by the caller */
291 void draw_textured_quad(struct wined3d_surface *src_surface, const RECT *src_rect,
292         const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
293 {
294     struct blt_info info;
295
296     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
297
298     glEnable(info.bind_target);
299     checkGLcall("glEnable(bind_target)");
300
301     /* Bind the texture */
302     glBindTexture(info.bind_target, src_surface->texture_name);
303     checkGLcall("glBindTexture");
304
305     /* Filtering for StretchRect */
306     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
307             wined3d_gl_mag_filter(magLookup, Filter));
308     checkGLcall("glTexParameteri");
309     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
310             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
311     checkGLcall("glTexParameteri");
312     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
313     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
314     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
315     checkGLcall("glTexEnvi");
316
317     /* Draw a quad */
318     glBegin(GL_TRIANGLE_STRIP);
319     glTexCoord3fv(info.coords[0]);
320     glVertex2i(dst_rect->left, dst_rect->top);
321
322     glTexCoord3fv(info.coords[1]);
323     glVertex2i(dst_rect->right, dst_rect->top);
324
325     glTexCoord3fv(info.coords[2]);
326     glVertex2i(dst_rect->left, dst_rect->bottom);
327
328     glTexCoord3fv(info.coords[3]);
329     glVertex2i(dst_rect->right, dst_rect->bottom);
330     glEnd();
331
332     /* Unbind the texture */
333     glBindTexture(info.bind_target, 0);
334     checkGLcall("glBindTexture(info->bind_target, 0)");
335
336     /* We changed the filtering settings on the texture. Inform the
337      * container about this to get the filters reset properly next draw. */
338     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
339     {
340         struct wined3d_texture *texture = src_surface->container.u.texture;
341         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
342         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
343         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
344     }
345 }
346
347 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
348 {
349     const struct wined3d_format *format = surface->resource.format;
350     SYSTEM_INFO sysInfo;
351     BITMAPINFO *b_info;
352     int extraline = 0;
353     DWORD *masks;
354     UINT usage;
355     HDC dc;
356
357     TRACE("surface %p.\n", surface);
358
359     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
360     {
361         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
362         return WINED3DERR_INVALIDCALL;
363     }
364
365     switch (format->byte_count)
366     {
367         case 2:
368         case 4:
369             /* Allocate extra space to store the RGB bit masks. */
370             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
371             break;
372
373         case 3:
374             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
375             break;
376
377         default:
378             /* Allocate extra space for a palette. */
379             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
380                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
381             break;
382     }
383
384     if (!b_info)
385         return E_OUTOFMEMORY;
386
387     /* Some applications access the surface in via DWORDs, and do not take
388      * the necessary care at the end of the surface. So we need at least
389      * 4 extra bytes at the end of the surface. Check against the page size,
390      * if the last page used for the surface has at least 4 spare bytes we're
391      * safe, otherwise add an extra line to the DIB section. */
392     GetSystemInfo(&sysInfo);
393     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
394     {
395         extraline = 1;
396         TRACE("Adding an extra line to the DIB section.\n");
397     }
398
399     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
400     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
401     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
402     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
403     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
404             * wined3d_surface_get_pitch(surface);
405     b_info->bmiHeader.biPlanes = 1;
406     b_info->bmiHeader.biBitCount = format->byte_count * 8;
407
408     b_info->bmiHeader.biXPelsPerMeter = 0;
409     b_info->bmiHeader.biYPelsPerMeter = 0;
410     b_info->bmiHeader.biClrUsed = 0;
411     b_info->bmiHeader.biClrImportant = 0;
412
413     /* Get the bit masks */
414     masks = (DWORD *)b_info->bmiColors;
415     switch (surface->resource.format->id)
416     {
417         case WINED3DFMT_B8G8R8_UNORM:
418             usage = DIB_RGB_COLORS;
419             b_info->bmiHeader.biCompression = BI_RGB;
420             break;
421
422         case WINED3DFMT_B5G5R5X1_UNORM:
423         case WINED3DFMT_B5G5R5A1_UNORM:
424         case WINED3DFMT_B4G4R4A4_UNORM:
425         case WINED3DFMT_B4G4R4X4_UNORM:
426         case WINED3DFMT_B2G3R3_UNORM:
427         case WINED3DFMT_B2G3R3A8_UNORM:
428         case WINED3DFMT_R10G10B10A2_UNORM:
429         case WINED3DFMT_R8G8B8A8_UNORM:
430         case WINED3DFMT_R8G8B8X8_UNORM:
431         case WINED3DFMT_B10G10R10A2_UNORM:
432         case WINED3DFMT_B5G6R5_UNORM:
433         case WINED3DFMT_R16G16B16A16_UNORM:
434             usage = 0;
435             b_info->bmiHeader.biCompression = BI_BITFIELDS;
436             masks[0] = format->red_mask;
437             masks[1] = format->green_mask;
438             masks[2] = format->blue_mask;
439             break;
440
441         default:
442             /* Don't know palette */
443             b_info->bmiHeader.biCompression = BI_RGB;
444             usage = 0;
445             break;
446     }
447
448     if (!(dc = GetDC(0)))
449     {
450         HeapFree(GetProcessHeap(), 0, b_info);
451         return HRESULT_FROM_WIN32(GetLastError());
452     }
453
454     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
455             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
456             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
457     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
458     ReleaseDC(0, dc);
459
460     if (!surface->dib.DIBsection)
461     {
462         ERR("Failed to create DIB section.\n");
463         HeapFree(GetProcessHeap(), 0, b_info);
464         return HRESULT_FROM_WIN32(GetLastError());
465     }
466
467     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
468     /* Copy the existing surface to the dib section. */
469     if (surface->resource.allocatedMemory)
470     {
471         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
472                 surface->resource.height * wined3d_surface_get_pitch(surface));
473     }
474     else
475     {
476         /* This is to make maps read the GL texture although memory is allocated. */
477         surface->flags &= ~SFLAG_INSYSMEM;
478     }
479     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
480
481     HeapFree(GetProcessHeap(), 0, b_info);
482
483     /* Now allocate a DC. */
484     surface->hDC = CreateCompatibleDC(0);
485     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
486     TRACE("Using wined3d palette %p.\n", surface->palette);
487     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
488
489     surface->flags |= SFLAG_DIBSECTION;
490
491     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
492     surface->resource.heapMemory = NULL;
493
494     return WINED3D_OK;
495 }
496
497 static void surface_prepare_system_memory(struct wined3d_surface *surface)
498 {
499     struct wined3d_device *device = surface->resource.device;
500     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
501
502     TRACE("surface %p.\n", surface);
503
504     /* Performance optimization: Count how often a surface is locked, if it is
505      * locked regularly do not throw away the system memory copy. This avoids
506      * the need to download the surface from OpenGL all the time. The surface
507      * is still downloaded if the OpenGL texture is changed. */
508     if (!(surface->flags & SFLAG_DYNLOCK))
509     {
510         if (++surface->lockCount > MAXLOCKCOUNT)
511         {
512             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
513             surface->flags |= SFLAG_DYNLOCK;
514         }
515     }
516
517     /* Create a PBO for dynamically locked surfaces but don't do it for
518      * converted or NPOT surfaces. Also don't create a PBO for systemmem
519      * surfaces. */
520     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
521             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
522             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
523     {
524         struct wined3d_context *context;
525         GLenum error;
526
527         context = context_acquire(device, NULL);
528         ENTER_GL();
529
530         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
531         error = glGetError();
532         if (!surface->pbo || error != GL_NO_ERROR)
533             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
534
535         TRACE("Binding PBO %u.\n", surface->pbo);
536
537         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
538         checkGLcall("glBindBufferARB");
539
540         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
541                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
542         checkGLcall("glBufferDataARB");
543
544         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
545         checkGLcall("glBindBufferARB");
546
547         /* We don't need the system memory anymore and we can't even use it for PBOs. */
548         if (!(surface->flags & SFLAG_CLIENT))
549         {
550             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
551             surface->resource.heapMemory = NULL;
552         }
553         surface->resource.allocatedMemory = NULL;
554         surface->flags |= SFLAG_PBO;
555         LEAVE_GL();
556         context_release(context);
557     }
558     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
559     {
560         /* Whatever surface we have, make sure that there is memory allocated
561          * for the downloaded copy, or a PBO to map. */
562         if (!surface->resource.heapMemory)
563             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
564
565         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
566                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
567
568         if (surface->flags & SFLAG_INSYSMEM)
569             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
570     }
571 }
572
573 static void surface_evict_sysmem(struct wined3d_surface *surface)
574 {
575     if (surface->flags & SFLAG_DONOTFREE)
576         return;
577
578     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
579     surface->resource.allocatedMemory = NULL;
580     surface->resource.heapMemory = NULL;
581     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
582 }
583
584 /* Context activation is done by the caller. */
585 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
586         const struct wined3d_gl_info *gl_info, BOOL srgb)
587 {
588     struct wined3d_device *device = surface->resource.device;
589     DWORD active_sampler;
590     GLint active_texture;
591
592     /* We don't need a specific texture unit, but after binding the texture
593      * the current unit is dirty. Read the unit back instead of switching to
594      * 0, this avoids messing around with the state manager's GL states. The
595      * current texture unit should always be a valid one.
596      *
597      * To be more specific, this is tricky because we can implicitly be
598      * called from sampler() in state.c. This means we can't touch anything
599      * other than whatever happens to be the currently active texture, or we
600      * would risk marking already applied sampler states dirty again.
601      *
602      * TODO: Track the current active texture per GL context instead of using
603      * glGet(). */
604
605     ENTER_GL();
606     glGetIntegerv(GL_ACTIVE_TEXTURE, &active_texture);
607     LEAVE_GL();
608     active_sampler = device->rev_tex_unit_map[active_texture - GL_TEXTURE0_ARB];
609
610     if (active_sampler != WINED3D_UNMAPPED_STAGE)
611     {
612         IWineD3DDeviceImpl_MarkStateDirty(device, STATE_SAMPLER(active_sampler));
613     }
614     surface_bind(surface, gl_info, srgb);
615 }
616
617 static void surface_force_reload(struct wined3d_surface *surface)
618 {
619     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
620 }
621
622 static void surface_release_client_storage(struct wined3d_surface *surface)
623 {
624     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
625
626     ENTER_GL();
627     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
628     if (surface->texture_name)
629     {
630         surface_bind_and_dirtify(surface, context->gl_info, FALSE);
631         glTexImage2D(surface->texture_target, surface->texture_level,
632                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
633     }
634     if (surface->texture_name_srgb)
635     {
636         surface_bind_and_dirtify(surface, context->gl_info, TRUE);
637         glTexImage2D(surface->texture_target, surface->texture_level,
638                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
639     }
640     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
641     LEAVE_GL();
642
643     context_release(context);
644
645     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
646     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
647     surface_force_reload(surface);
648 }
649
650 static HRESULT surface_private_setup(struct wined3d_surface *surface)
651 {
652     /* TODO: Check against the maximum texture sizes supported by the video card. */
653     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
654     unsigned int pow2Width, pow2Height;
655
656     TRACE("surface %p.\n", surface);
657
658     surface->texture_name = 0;
659     surface->texture_target = GL_TEXTURE_2D;
660
661     /* Non-power2 support */
662     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
663     {
664         pow2Width = surface->resource.width;
665         pow2Height = surface->resource.height;
666     }
667     else
668     {
669         /* Find the nearest pow2 match */
670         pow2Width = pow2Height = 1;
671         while (pow2Width < surface->resource.width)
672             pow2Width <<= 1;
673         while (pow2Height < surface->resource.height)
674             pow2Height <<= 1;
675     }
676     surface->pow2Width = pow2Width;
677     surface->pow2Height = pow2Height;
678
679     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
680     {
681         /* TODO: Add support for non power two compressed textures. */
682         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
683         {
684             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
685                   surface, surface->resource.width, surface->resource.height);
686             return WINED3DERR_NOTAVAILABLE;
687         }
688     }
689
690     if (pow2Width != surface->resource.width
691             || pow2Height != surface->resource.height)
692     {
693         surface->flags |= SFLAG_NONPOW2;
694     }
695
696     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
697             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
698     {
699         /* One of three options:
700          * 1: Do the same as we do with NPOT and scale the texture, (any
701          *    texture ops would require the texture to be scaled which is
702          *    potentially slow)
703          * 2: Set the texture to the maximum size (bad idea).
704          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
705          * 4: Create the surface, but allow it to be used only for DirectDraw
706          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
707          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
708          *    the render target. */
709         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
710         {
711             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
712             return WINED3DERR_NOTAVAILABLE;
713         }
714
715         /* We should never use this surface in combination with OpenGL! */
716         TRACE("Creating an oversized surface: %ux%u.\n",
717                 surface->pow2Width, surface->pow2Height);
718     }
719     else
720     {
721         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
722          * and EXT_PALETTED_TEXTURE is used in combination with texture
723          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
724          * EXT_PALETTED_TEXTURE doesn't work in combination with
725          * ARB_TEXTURE_RECTANGLE. */
726         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
727                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
728                 && gl_info->supported[EXT_PALETTED_TEXTURE]
729                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
730         {
731             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
732             surface->pow2Width = surface->resource.width;
733             surface->pow2Height = surface->resource.height;
734             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
735         }
736     }
737
738     switch (wined3d_settings.offscreen_rendering_mode)
739     {
740         case ORM_FBO:
741             surface->get_drawable_size = get_drawable_size_fbo;
742             break;
743
744         case ORM_BACKBUFFER:
745             surface->get_drawable_size = get_drawable_size_backbuffer;
746             break;
747
748         default:
749             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
750             return WINED3DERR_INVALIDCALL;
751     }
752
753     surface->flags |= SFLAG_INSYSMEM;
754
755     return WINED3D_OK;
756 }
757
758 static void surface_realize_palette(struct wined3d_surface *surface)
759 {
760     struct wined3d_palette *palette = surface->palette;
761
762     TRACE("surface %p.\n", surface);
763
764     if (!palette) return;
765
766     if (surface->resource.format->id == WINED3DFMT_P8_UINT
767             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
768     {
769         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
770         {
771             /* Make sure the texture is up to date. This call doesn't do
772              * anything if the texture is already up to date. */
773             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
774
775             /* We want to force a palette refresh, so mark the drawable as not being up to date */
776             if (!surface_is_offscreen(surface))
777                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
778         }
779         else
780         {
781             if (!(surface->flags & SFLAG_INSYSMEM))
782             {
783                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
784                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
785             }
786             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
787         }
788     }
789
790     if (surface->flags & SFLAG_DIBSECTION)
791     {
792         RGBQUAD col[256];
793         unsigned int i;
794
795         TRACE("Updating the DC's palette.\n");
796
797         for (i = 0; i < 256; ++i)
798         {
799             col[i].rgbRed   = palette->palents[i].peRed;
800             col[i].rgbGreen = palette->palents[i].peGreen;
801             col[i].rgbBlue  = palette->palents[i].peBlue;
802             col[i].rgbReserved = 0;
803         }
804         SetDIBColorTable(surface->hDC, 0, 256, col);
805     }
806
807     /* Propagate the changes to the drawable when we have a palette. */
808     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
809         surface_load_location(surface, SFLAG_INDRAWABLE, NULL);
810 }
811
812 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
813 {
814     HRESULT hr;
815
816     /* If there's no destination surface there is nothing to do. */
817     if (!surface->overlay_dest)
818         return WINED3D_OK;
819
820     /* Blt calls ModifyLocation on the dest surface, which in turn calls
821      * DrawOverlay to update the overlay. Prevent an endless recursion. */
822     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
823         return WINED3D_OK;
824
825     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
826     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
827             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
828     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
829
830     return hr;
831 }
832
833 static void surface_preload(struct wined3d_surface *surface)
834 {
835     TRACE("surface %p.\n", surface);
836
837     surface_internal_preload(surface, SRGB_ANY);
838 }
839
840 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
841 {
842     struct wined3d_device *device = surface->resource.device;
843     const RECT *pass_rect = rect;
844
845     TRACE("surface %p, rect %s, flags %#x.\n",
846             surface, wine_dbgstr_rect(rect), flags);
847
848     if (flags & WINED3DLOCK_DISCARD)
849     {
850         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
851         surface_prepare_system_memory(surface);
852         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
853     }
854     else
855     {
856         /* surface_load_location() does not check if the rectangle specifies
857          * the full surface. Most callers don't need that, so do it here. */
858         if (rect && !rect->top && !rect->left
859                 && rect->right == surface->resource.width
860                 && rect->bottom == surface->resource.height)
861             pass_rect = NULL;
862
863         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
864                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN) || surface == device->render_targets[0])))
865             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
866     }
867
868     if (surface->flags & SFLAG_PBO)
869     {
870         const struct wined3d_gl_info *gl_info;
871         struct wined3d_context *context;
872
873         context = context_acquire(device, NULL);
874         gl_info = context->gl_info;
875
876         ENTER_GL();
877         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
878         checkGLcall("glBindBufferARB");
879
880         /* This shouldn't happen but could occur if some other function
881          * didn't handle the PBO properly. */
882         if (surface->resource.allocatedMemory)
883             ERR("The surface already has PBO memory allocated.\n");
884
885         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
886         checkGLcall("glMapBufferARB");
887
888         /* Make sure the PBO isn't set anymore in order not to break non-PBO
889          * calls. */
890         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
891         checkGLcall("glBindBufferARB");
892
893         LEAVE_GL();
894         context_release(context);
895     }
896
897     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
898     {
899         if (!rect)
900             surface_add_dirty_rect(surface, NULL);
901         else
902         {
903             WINED3DBOX b;
904
905             b.Left = rect->left;
906             b.Top = rect->top;
907             b.Right = rect->right;
908             b.Bottom = rect->bottom;
909             b.Front = 0;
910             b.Back = 1;
911             surface_add_dirty_rect(surface, &b);
912         }
913     }
914 }
915
916 static void surface_unmap(struct wined3d_surface *surface)
917 {
918     struct wined3d_device *device = surface->resource.device;
919     BOOL fullsurface;
920
921     TRACE("surface %p.\n", surface);
922
923     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
924
925     if (surface->flags & SFLAG_PBO)
926     {
927         const struct wined3d_gl_info *gl_info;
928         struct wined3d_context *context;
929
930         TRACE("Freeing PBO memory.\n");
931
932         context = context_acquire(device, NULL);
933         gl_info = context->gl_info;
934
935         ENTER_GL();
936         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
937         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
938         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
939         checkGLcall("glUnmapBufferARB");
940         LEAVE_GL();
941         context_release(context);
942
943         surface->resource.allocatedMemory = NULL;
944     }
945
946     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
947
948     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
949     {
950         TRACE("Not dirtified, nothing to do.\n");
951         goto done;
952     }
953
954     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
955             || (device->render_targets && surface == device->render_targets[0]))
956     {
957         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
958         {
959             static BOOL warned = FALSE;
960             if (!warned)
961             {
962                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
963                 warned = TRUE;
964             }
965             goto done;
966         }
967
968         if (!surface->dirtyRect.left && !surface->dirtyRect.top
969                 && surface->dirtyRect.right == surface->resource.width
970                 && surface->dirtyRect.bottom == surface->resource.height)
971         {
972             fullsurface = TRUE;
973         }
974         else
975         {
976             /* TODO: Proper partial rectangle tracking. */
977             fullsurface = FALSE;
978             surface->flags |= SFLAG_INSYSMEM;
979         }
980
981         surface_load_location(surface, SFLAG_INDRAWABLE, fullsurface ? NULL : &surface->dirtyRect);
982
983         /* Partial rectangle tracking is not commonly implemented, it is only
984          * done for render targets. INSYSMEM was set before to tell
985          * surface_load_location() where to read the rectangle from.
986          * Indrawable is set because all modifications from the partial
987          * sysmem copy are written back to the drawable, thus the surface is
988          * merged again in the drawable. The sysmem copy is not fully up to
989          * date because only a subrectangle was read in Map(). */
990         if (!fullsurface)
991         {
992             surface_modify_location(surface, SFLAG_INDRAWABLE, TRUE);
993             surface_evict_sysmem(surface);
994         }
995
996         surface->dirtyRect.left = surface->resource.width;
997         surface->dirtyRect.top = surface->resource.height;
998         surface->dirtyRect.right = 0;
999         surface->dirtyRect.bottom = 0;
1000     }
1001     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1002     {
1003         FIXME("Depth / stencil buffer locking is not implemented.\n");
1004     }
1005
1006 done:
1007     /* Overlays have to be redrawn manually after changes with the GL implementation */
1008     if (surface->overlay_dest)
1009         surface->surface_ops->surface_draw_overlay(surface);
1010 }
1011
1012 static HRESULT surface_getdc(struct wined3d_surface *surface)
1013 {
1014     WINED3DLOCKED_RECT lock;
1015     HRESULT hr;
1016
1017     TRACE("surface %p.\n", surface);
1018
1019     /* Create a DIB section if there isn't a dc yet. */
1020     if (!surface->hDC)
1021     {
1022         if (surface->flags & SFLAG_CLIENT)
1023         {
1024             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1025             surface_release_client_storage(surface);
1026         }
1027         hr = surface_create_dib_section(surface);
1028         if (FAILED(hr))
1029             return WINED3DERR_INVALIDCALL;
1030
1031         /* Use the DIB section from now on if we are not using a PBO. */
1032         if (!(surface->flags & SFLAG_PBO))
1033             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1034     }
1035
1036     /* Map the surface. */
1037     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1038     if (FAILED(hr))
1039         ERR("Map failed, hr %#x.\n", hr);
1040
1041     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1042      * activates the allocatedMemory. */
1043     if (surface->flags & SFLAG_PBO)
1044         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->dib.bitmap_size);
1045
1046     return hr;
1047 }
1048
1049 static HRESULT surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1050 {
1051     TRACE("surface %p, override %p.\n", surface, override);
1052
1053     /* Flipping is only supported on render targets and overlays. */
1054     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
1055     {
1056         WARN("Tried to flip a non-render target, non-overlay surface.\n");
1057         return WINEDDERR_NOTFLIPPABLE;
1058     }
1059
1060     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1061     {
1062         flip_surface(surface, override);
1063
1064         /* Update the overlay if it is visible */
1065         if (surface->overlay_dest)
1066             return surface->surface_ops->surface_draw_overlay(surface);
1067         else
1068             return WINED3D_OK;
1069     }
1070
1071     return WINED3D_OK;
1072 }
1073
1074 static BOOL surface_is_full_rect(struct wined3d_surface *surface, const RECT *r)
1075 {
1076     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1077         return FALSE;
1078     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1079         return FALSE;
1080     return TRUE;
1081 }
1082
1083 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1084         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1085 {
1086     const struct wined3d_gl_info *gl_info;
1087     struct wined3d_context *context;
1088     DWORD src_mask, dst_mask;
1089     GLbitfield gl_mask;
1090
1091     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1092             device, src_surface, wine_dbgstr_rect(src_rect),
1093             dst_surface, wine_dbgstr_rect(dst_rect));
1094
1095     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1096     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1097
1098     if (src_mask != dst_mask)
1099     {
1100         ERR("Incompatible formats %s and %s.\n",
1101                 debug_d3dformat(src_surface->resource.format->id),
1102                 debug_d3dformat(dst_surface->resource.format->id));
1103         return;
1104     }
1105
1106     if (!src_mask)
1107     {
1108         ERR("Not a depth / stencil format: %s.\n",
1109                 debug_d3dformat(src_surface->resource.format->id));
1110         return;
1111     }
1112
1113     gl_mask = 0;
1114     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1115         gl_mask |= GL_DEPTH_BUFFER_BIT;
1116     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1117         gl_mask |= GL_STENCIL_BUFFER_BIT;
1118
1119     /* Make sure the locations are up-to-date. Loading the destination
1120      * surface isn't required if the entire surface is overwritten. */
1121     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1122     if (!surface_is_full_rect(dst_surface, dst_rect))
1123         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1124
1125     context = context_acquire(device, NULL);
1126     if (!context->valid)
1127     {
1128         context_release(context);
1129         WARN("Invalid context, skipping blit.\n");
1130         return;
1131     }
1132
1133     gl_info = context->gl_info;
1134
1135     ENTER_GL();
1136
1137     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1138     glReadBuffer(GL_NONE);
1139     checkGLcall("glReadBuffer()");
1140     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1141
1142     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1143     context_set_draw_buffer(context, GL_NONE);
1144     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1145
1146     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1147     {
1148         glDepthMask(GL_TRUE);
1149         IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1150     }
1151     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1152     {
1153         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1154         {
1155             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1156             IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1157         }
1158         glStencilMask(~0U);
1159         IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1160     }
1161
1162     glDisable(GL_SCISSOR_TEST);
1163     IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1164
1165     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1166             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1167     checkGLcall("glBlitFramebuffer()");
1168
1169     LEAVE_GL();
1170
1171     if (wined3d_settings.strict_draw_ordering)
1172         wglFlush(); /* Flush to ensure ordering across contexts. */
1173
1174     context_release(context);
1175 }
1176
1177 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1178         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1179         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1180 {
1181     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1182         return FALSE;
1183
1184     /* Source and/or destination need to be on the GL side */
1185     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1186         return FALSE;
1187
1188     switch (blit_op)
1189     {
1190         case WINED3D_BLIT_OP_COLOR_BLIT:
1191             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1192                 return FALSE;
1193             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1194                 return FALSE;
1195             break;
1196
1197         case WINED3D_BLIT_OP_DEPTH_BLIT:
1198             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1199                 return FALSE;
1200             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1201                 return FALSE;
1202             break;
1203
1204         default:
1205             return FALSE;
1206     }
1207
1208     if (!(src_format->id == dst_format->id
1209             || (is_identity_fixup(src_format->color_fixup)
1210             && is_identity_fixup(dst_format->color_fixup))))
1211         return FALSE;
1212
1213     return TRUE;
1214 }
1215
1216 static BOOL surface_convert_depth_to_float(struct wined3d_surface *surface, DWORD depth, float *float_depth)
1217 {
1218     const struct wined3d_format *format = surface->resource.format;
1219
1220     switch (format->id)
1221     {
1222         case WINED3DFMT_S1_UINT_D15_UNORM:
1223             *float_depth = depth / (float)0x00007fff;
1224             break;
1225
1226         case WINED3DFMT_D16_UNORM:
1227             *float_depth = depth / (float)0x0000ffff;
1228             break;
1229
1230         case WINED3DFMT_D24_UNORM_S8_UINT:
1231         case WINED3DFMT_X8D24_UNORM:
1232             *float_depth = depth / (float)0x00ffffff;
1233             break;
1234
1235         case WINED3DFMT_D32_UNORM:
1236             *float_depth = depth / (float)0xffffffff;
1237             break;
1238
1239         default:
1240             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1241             return FALSE;
1242     }
1243
1244     return TRUE;
1245 }
1246
1247 /* Do not call while under the GL lock. */
1248 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1249 {
1250     const struct wined3d_resource *resource = &surface->resource;
1251     struct wined3d_device *device = resource->device;
1252     const struct blit_shader *blitter;
1253
1254     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1255             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1256     if (!blitter)
1257     {
1258         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1259         return WINED3DERR_INVALIDCALL;
1260     }
1261
1262     return blitter->depth_fill(device, surface, rect, depth);
1263 }
1264
1265 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1266         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1267 {
1268     struct wined3d_device *device = src_surface->resource.device;
1269
1270     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1271             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1272             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1273         return WINED3DERR_INVALIDCALL;
1274
1275     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1276
1277     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1278             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1279     surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
1280
1281     return WINED3D_OK;
1282 }
1283
1284 /* Do not call while under the GL lock. */
1285 static HRESULT surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1286         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1287         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1288 {
1289     struct wined3d_device *device = dst_surface->resource.device;
1290     DWORD src_ds_flags, dst_ds_flags;
1291
1292     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1293             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1294             flags, fx, debug_d3dtexturefiltertype(filter));
1295     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1296
1297     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1298     {
1299         WARN(" Surface is busy, returning DDERR_SURFACEBUSY\n");
1300         return WINEDDERR_SURFACEBUSY;
1301     }
1302
1303     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1304     if (src_surface)
1305         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1306     else
1307         src_ds_flags = 0;
1308
1309     if (src_ds_flags || dst_ds_flags)
1310     {
1311         if (flags & WINEDDBLT_DEPTHFILL)
1312         {
1313             float depth;
1314             RECT rect;
1315
1316             TRACE("Depth fill.\n");
1317
1318             surface_get_rect(dst_surface, dst_rect_in, &rect);
1319
1320             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1321                 return WINED3DERR_INVALIDCALL;
1322
1323             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &rect, depth)))
1324                 return WINED3D_OK;
1325         }
1326         else
1327         {
1328             RECT src_rect, dst_rect;
1329
1330             /* Accessing depth / stencil surfaces is supposed to fail while in
1331              * a scene, except for fills, which seem to work. */
1332             if (device->inScene)
1333             {
1334                 WARN("Rejecting depth / stencil access while in scene.\n");
1335                 return WINED3DERR_INVALIDCALL;
1336             }
1337
1338             if (src_ds_flags != dst_ds_flags)
1339             {
1340                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1341                 return WINED3DERR_INVALIDCALL;
1342             }
1343
1344             if (src_rect_in && (src_rect_in->top || src_rect_in->left
1345                     || src_rect_in->bottom != src_surface->resource.height
1346                     || src_rect_in->right != src_surface->resource.width))
1347             {
1348                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1349                         wine_dbgstr_rect(src_rect_in));
1350                 return WINED3DERR_INVALIDCALL;
1351             }
1352
1353             if (dst_rect_in && (dst_rect_in->top || dst_rect_in->left
1354                     || dst_rect_in->bottom != dst_surface->resource.height
1355                     || dst_rect_in->right != dst_surface->resource.width))
1356             {
1357                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1358                         wine_dbgstr_rect(src_rect_in));
1359                 return WINED3DERR_INVALIDCALL;
1360             }
1361
1362             if (src_surface->resource.height != dst_surface->resource.height
1363                     || src_surface->resource.width != dst_surface->resource.width)
1364             {
1365                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1366                 return WINED3DERR_INVALIDCALL;
1367             }
1368
1369             surface_get_rect(src_surface, src_rect_in, &src_rect);
1370             surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1371
1372             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1373                 return WINED3D_OK;
1374         }
1375     }
1376
1377     /* Special cases for render targets. */
1378     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1379             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1380     {
1381         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, dst_rect_in,
1382                 src_surface, src_rect_in, flags, fx, filter)))
1383             return WINED3D_OK;
1384     }
1385
1386     /* For the rest call the X11 surface implementation. For render targets
1387      * this should be implemented OpenGL accelerated in BltOverride, other
1388      * blits are rather rare. */
1389     return surface_cpu_blt(dst_surface, dst_rect_in, src_surface, src_rect_in, flags, fx, filter);
1390 }
1391
1392 /* Do not call while under the GL lock. */
1393 static HRESULT surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1394         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1395 {
1396     struct wined3d_device *device = dst_surface->resource.device;
1397
1398     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
1399             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1400
1401     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface->flags & SFLAG_LOCKED))
1402     {
1403         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1404         return WINEDDERR_SURFACEBUSY;
1405     }
1406
1407     if (device->inScene && (dst_surface == device->depth_stencil || src_surface == device->depth_stencil))
1408     {
1409         WARN("Attempt to access the depth / stencil surface while in a scene.\n");
1410         return WINED3DERR_INVALIDCALL;
1411     }
1412
1413     /* Special cases for RenderTargets */
1414     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1415             || (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
1416     {
1417
1418         RECT src_rect, dst_rect;
1419         DWORD flags = 0;
1420
1421         surface_get_rect(src_surface, src_rect_in, &src_rect);
1422
1423         dst_rect.left = dst_x;
1424         dst_rect.top = dst_y;
1425         dst_rect.right = dst_x + src_rect.right - src_rect.left;
1426         dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1427
1428         /* Convert BltFast flags into Blt ones because BltOverride is called
1429          * from Blt as well. */
1430         if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1431             flags |= WINEDDBLT_KEYSRC;
1432         if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1433             flags |= WINEDDBLT_KEYDEST;
1434         if (trans & WINEDDBLTFAST_WAIT)
1435             flags |= WINEDDBLT_WAIT;
1436         if (trans & WINEDDBLTFAST_DONOTWAIT)
1437             flags |= WINEDDBLT_DONOTWAIT;
1438
1439         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface,
1440                 &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT)))
1441             return WINED3D_OK;
1442     }
1443
1444     return surface_cpu_bltfast(dst_surface, dst_x, dst_y, src_surface, src_rect_in, trans);
1445 }
1446
1447 static HRESULT surface_set_mem(struct wined3d_surface *surface, void *mem)
1448 {
1449     TRACE("surface %p, mem %p.\n", surface, mem);
1450
1451     if (mem && mem != surface->resource.allocatedMemory)
1452     {
1453         void *release = NULL;
1454
1455         /* Do I have to copy the old surface content? */
1456         if (surface->flags & SFLAG_DIBSECTION)
1457         {
1458             SelectObject(surface->hDC, surface->dib.holdbitmap);
1459             DeleteDC(surface->hDC);
1460             /* Release the DIB section. */
1461             DeleteObject(surface->dib.DIBsection);
1462             surface->dib.bitmap_data = NULL;
1463             surface->resource.allocatedMemory = NULL;
1464             surface->hDC = NULL;
1465             surface->flags &= ~SFLAG_DIBSECTION;
1466         }
1467         else if (!(surface->flags & SFLAG_USERPTR))
1468         {
1469             release = surface->resource.heapMemory;
1470             surface->resource.heapMemory = NULL;
1471         }
1472         surface->resource.allocatedMemory = mem;
1473         surface->flags |= SFLAG_USERPTR;
1474
1475         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
1476         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1477
1478         /* For client textures OpenGL has to be notified. */
1479         if (surface->flags & SFLAG_CLIENT)
1480             surface_release_client_storage(surface);
1481
1482         /* Now free the old memory if any. */
1483         HeapFree(GetProcessHeap(), 0, release);
1484     }
1485     else if (surface->flags & SFLAG_USERPTR)
1486     {
1487         /* Map and GetDC will re-create the dib section and allocated memory. */
1488         surface->resource.allocatedMemory = NULL;
1489         /* HeapMemory should be NULL already. */
1490         if (surface->resource.heapMemory)
1491             ERR("User pointer surface has heap memory allocated.\n");
1492         surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
1493
1494         if (surface->flags & SFLAG_CLIENT)
1495             surface_release_client_storage(surface);
1496
1497         surface_prepare_system_memory(surface);
1498         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1499     }
1500
1501     return WINED3D_OK;
1502 }
1503
1504 /* Context activation is done by the caller. */
1505 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1506 {
1507     if (!surface->resource.heapMemory)
1508     {
1509         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1510         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1511                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1512     }
1513
1514     ENTER_GL();
1515     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1516     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1517     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1518             surface->resource.size, surface->resource.allocatedMemory));
1519     checkGLcall("glGetBufferSubDataARB");
1520     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1521     checkGLcall("glDeleteBuffersARB");
1522     LEAVE_GL();
1523
1524     surface->pbo = 0;
1525     surface->flags &= ~SFLAG_PBO;
1526 }
1527
1528 /* Do not call while under the GL lock. */
1529 static void surface_unload(struct wined3d_resource *resource)
1530 {
1531     struct wined3d_surface *surface = surface_from_resource(resource);
1532     struct wined3d_renderbuffer_entry *entry, *entry2;
1533     struct wined3d_device *device = resource->device;
1534     const struct wined3d_gl_info *gl_info;
1535     struct wined3d_context *context;
1536
1537     TRACE("surface %p.\n", surface);
1538
1539     if (resource->pool == WINED3DPOOL_DEFAULT)
1540     {
1541         /* Default pool resources are supposed to be destroyed before Reset is called.
1542          * Implicit resources stay however. So this means we have an implicit render target
1543          * or depth stencil. The content may be destroyed, but we still have to tear down
1544          * opengl resources, so we cannot leave early.
1545          *
1546          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1547          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1548          * or the depth stencil into an FBO the texture or render buffer will be removed
1549          * and all flags get lost
1550          */
1551         surface_init_sysmem(surface);
1552     }
1553     else
1554     {
1555         /* Load the surface into system memory */
1556         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1557         surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
1558     }
1559     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1560     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1561     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1562
1563     context = context_acquire(device, NULL);
1564     gl_info = context->gl_info;
1565
1566     /* Destroy PBOs, but load them into real sysmem before */
1567     if (surface->flags & SFLAG_PBO)
1568         surface_remove_pbo(surface, gl_info);
1569
1570     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1571      * all application-created targets the application has to release the surface
1572      * before calling _Reset
1573      */
1574     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1575     {
1576         ENTER_GL();
1577         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1578         LEAVE_GL();
1579         list_remove(&entry->entry);
1580         HeapFree(GetProcessHeap(), 0, entry);
1581     }
1582     list_init(&surface->renderbuffers);
1583     surface->current_renderbuffer = NULL;
1584
1585     /* If we're in a texture, the texture name belongs to the texture.
1586      * Otherwise, destroy it. */
1587     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1588     {
1589         ENTER_GL();
1590         glDeleteTextures(1, &surface->texture_name);
1591         surface->texture_name = 0;
1592         glDeleteTextures(1, &surface->texture_name_srgb);
1593         surface->texture_name_srgb = 0;
1594         LEAVE_GL();
1595     }
1596
1597     context_release(context);
1598
1599     resource_unload(resource);
1600 }
1601
1602 static const struct wined3d_resource_ops surface_resource_ops =
1603 {
1604     surface_unload,
1605 };
1606
1607 static const struct wined3d_surface_ops surface_ops =
1608 {
1609     surface_private_setup,
1610     surface_cleanup,
1611     surface_realize_palette,
1612     surface_draw_overlay,
1613     surface_preload,
1614     surface_map,
1615     surface_unmap,
1616     surface_getdc,
1617     surface_flip,
1618     surface_blt,
1619     surface_bltfast,
1620     surface_set_mem,
1621 };
1622
1623 /*****************************************************************************
1624  * Initializes the GDI surface, aka creates the DIB section we render to
1625  * The DIB section creation is done by calling GetDC, which will create the
1626  * section and releasing the dc to allow the app to use it. The dib section
1627  * will stay until the surface is released
1628  *
1629  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1630  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1631  * avoid confusion in the shared surface code.
1632  *
1633  * Returns:
1634  *  WINED3D_OK on success
1635  *  The return values of called methods on failure
1636  *
1637  *****************************************************************************/
1638 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1639 {
1640     HRESULT hr;
1641
1642     TRACE("surface %p.\n", surface);
1643
1644     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1645     {
1646         ERR("Overlays not yet supported by GDI surfaces.\n");
1647         return WINED3DERR_INVALIDCALL;
1648     }
1649
1650     /* Sysmem textures have memory already allocated - release it,
1651      * this avoids an unnecessary memcpy. */
1652     hr = surface_create_dib_section(surface);
1653     if (SUCCEEDED(hr))
1654     {
1655         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1656         surface->resource.heapMemory = NULL;
1657         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1658     }
1659
1660     /* We don't mind the nonpow2 stuff in GDI. */
1661     surface->pow2Width = surface->resource.width;
1662     surface->pow2Height = surface->resource.height;
1663
1664     return WINED3D_OK;
1665 }
1666
1667 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1668 {
1669     TRACE("surface %p.\n", surface);
1670
1671     if (surface->flags & SFLAG_DIBSECTION)
1672     {
1673         /* Release the DC. */
1674         SelectObject(surface->hDC, surface->dib.holdbitmap);
1675         DeleteDC(surface->hDC);
1676         /* Release the DIB section. */
1677         DeleteObject(surface->dib.DIBsection);
1678         surface->dib.bitmap_data = NULL;
1679         surface->resource.allocatedMemory = NULL;
1680     }
1681
1682     if (surface->flags & SFLAG_USERPTR)
1683         wined3d_surface_set_mem(surface, NULL);
1684     if (surface->overlay_dest)
1685         list_remove(&surface->overlay_entry);
1686
1687     HeapFree(GetProcessHeap(), 0, surface->palette9);
1688
1689     resource_cleanup(&surface->resource);
1690 }
1691
1692 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1693 {
1694     struct wined3d_palette *palette = surface->palette;
1695
1696     TRACE("surface %p.\n", surface);
1697
1698     if (!palette) return;
1699
1700     if (surface->flags & SFLAG_DIBSECTION)
1701     {
1702         RGBQUAD col[256];
1703         unsigned int i;
1704
1705         TRACE("Updating the DC's palette.\n");
1706
1707         for (i = 0; i < 256; ++i)
1708         {
1709             col[i].rgbRed = palette->palents[i].peRed;
1710             col[i].rgbGreen = palette->palents[i].peGreen;
1711             col[i].rgbBlue = palette->palents[i].peBlue;
1712             col[i].rgbReserved = 0;
1713         }
1714         SetDIBColorTable(surface->hDC, 0, 256, col);
1715     }
1716
1717     /* Update the image because of the palette change. Some games like e.g.
1718      * Red Alert call SetEntries a lot to implement fading. */
1719     /* Tell the swapchain to update the screen. */
1720     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1721     {
1722         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1723         if (surface == swapchain->front_buffer)
1724         {
1725             x11_copy_to_screen(swapchain, NULL);
1726         }
1727     }
1728 }
1729
1730 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
1731 {
1732     FIXME("GDI surfaces can't draw overlays yet.\n");
1733     return E_FAIL;
1734 }
1735
1736 static void gdi_surface_preload(struct wined3d_surface *surface)
1737 {
1738     TRACE("surface %p.\n", surface);
1739
1740     ERR("Preloading GDI surfaces is not supported.\n");
1741 }
1742
1743 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1744 {
1745     TRACE("surface %p, rect %s, flags %#x.\n",
1746             surface, wine_dbgstr_rect(rect), flags);
1747
1748     if (!surface->resource.allocatedMemory)
1749     {
1750         /* This happens on gdi surfaces if the application set a user pointer
1751          * and resets it. Recreate the DIB section. */
1752         surface_create_dib_section(surface);
1753         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1754     }
1755 }
1756
1757 static void gdi_surface_unmap(struct wined3d_surface *surface)
1758 {
1759     TRACE("surface %p.\n", surface);
1760
1761     /* Tell the swapchain to update the screen. */
1762     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1763     {
1764         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1765         if (surface == swapchain->front_buffer)
1766         {
1767             x11_copy_to_screen(swapchain, &surface->lockedRect);
1768         }
1769     }
1770
1771     memset(&surface->lockedRect, 0, sizeof(RECT));
1772 }
1773
1774 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
1775 {
1776     WINED3DLOCKED_RECT lock;
1777     HRESULT hr;
1778
1779     TRACE("surface %p.\n", surface);
1780
1781     /* Should have a DIB section already. */
1782     if (!(surface->flags & SFLAG_DIBSECTION))
1783     {
1784         WARN("DC not supported on this surface\n");
1785         return WINED3DERR_INVALIDCALL;
1786     }
1787
1788     /* Map the surface. */
1789     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1790     if (FAILED(hr))
1791         ERR("Map failed, hr %#x.\n", hr);
1792
1793     return hr;
1794 }
1795
1796 static HRESULT gdi_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1797 {
1798     TRACE("surface %p, override %p.\n", surface, override);
1799
1800     return WINED3D_OK;
1801 }
1802
1803 static HRESULT gdi_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
1804         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
1805         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1806 {
1807     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1808             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
1809             flags, fx, debug_d3dtexturefiltertype(filter));
1810
1811     return surface_cpu_blt(dst_surface, dst_rect, src_surface, src_rect, flags, fx, filter);
1812 }
1813
1814 static HRESULT gdi_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1815         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
1816 {
1817     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
1818             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
1819
1820     return surface_cpu_bltfast(dst_surface, dst_x, dst_y, src_surface, src_rect, trans);
1821 }
1822
1823 static HRESULT gdi_surface_set_mem(struct wined3d_surface *surface, void *mem)
1824 {
1825     TRACE("surface %p, mem %p.\n", surface, mem);
1826
1827     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
1828     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1829     {
1830         ERR("Not supported on render targets.\n");
1831         return WINED3DERR_INVALIDCALL;
1832     }
1833
1834     if (mem && mem != surface->resource.allocatedMemory)
1835     {
1836         void *release = NULL;
1837
1838         /* Do I have to copy the old surface content? */
1839         if (surface->flags & SFLAG_DIBSECTION)
1840         {
1841             SelectObject(surface->hDC, surface->dib.holdbitmap);
1842             DeleteDC(surface->hDC);
1843             /* Release the DIB section. */
1844             DeleteObject(surface->dib.DIBsection);
1845             surface->dib.bitmap_data = NULL;
1846             surface->resource.allocatedMemory = NULL;
1847             surface->hDC = NULL;
1848             surface->flags &= ~SFLAG_DIBSECTION;
1849         }
1850         else if (!(surface->flags & SFLAG_USERPTR))
1851         {
1852             release = surface->resource.allocatedMemory;
1853         }
1854         surface->resource.allocatedMemory = mem;
1855         surface->flags |= SFLAG_USERPTR | SFLAG_INSYSMEM;
1856
1857         /* Now free the old memory, if any. */
1858         HeapFree(GetProcessHeap(), 0, release);
1859     }
1860     else if (surface->flags & SFLAG_USERPTR)
1861     {
1862         /* Map() and GetDC() will re-create the dib section and allocated memory. */
1863         surface->resource.allocatedMemory = NULL;
1864         surface->flags &= ~SFLAG_USERPTR;
1865     }
1866
1867     return WINED3D_OK;
1868 }
1869
1870 static const struct wined3d_surface_ops gdi_surface_ops =
1871 {
1872     gdi_surface_private_setup,
1873     surface_gdi_cleanup,
1874     gdi_surface_realize_palette,
1875     gdi_surface_draw_overlay,
1876     gdi_surface_preload,
1877     gdi_surface_map,
1878     gdi_surface_unmap,
1879     gdi_surface_getdc,
1880     gdi_surface_flip,
1881     gdi_surface_blt,
1882     gdi_surface_bltfast,
1883     gdi_surface_set_mem,
1884 };
1885
1886 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
1887 {
1888     GLuint *name;
1889     DWORD flag;
1890
1891     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
1892
1893     if(srgb)
1894     {
1895         name = &surface->texture_name_srgb;
1896         flag = SFLAG_INSRGBTEX;
1897     }
1898     else
1899     {
1900         name = &surface->texture_name;
1901         flag = SFLAG_INTEXTURE;
1902     }
1903
1904     if (!*name && new_name)
1905     {
1906         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
1907          * surface has no texture name yet. See if we can get rid of this. */
1908         if (surface->flags & flag)
1909             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
1910         surface_modify_location(surface, flag, FALSE);
1911     }
1912
1913     *name = new_name;
1914     surface_force_reload(surface);
1915 }
1916
1917 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
1918 {
1919     TRACE("surface %p, target %#x.\n", surface, target);
1920
1921     if (surface->texture_target != target)
1922     {
1923         if (target == GL_TEXTURE_RECTANGLE_ARB)
1924         {
1925             surface->flags &= ~SFLAG_NORMCOORD;
1926         }
1927         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
1928         {
1929             surface->flags |= SFLAG_NORMCOORD;
1930         }
1931     }
1932     surface->texture_target = target;
1933     surface_force_reload(surface);
1934 }
1935
1936 /* Context activation is done by the caller. */
1937 void surface_bind(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
1938 {
1939     TRACE("surface %p, gl_info %p, srgb %#x.\n", surface, gl_info, srgb);
1940
1941     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
1942     {
1943         struct wined3d_texture *texture = surface->container.u.texture;
1944
1945         TRACE("Passing to container (%p).\n", texture);
1946         texture->texture_ops->texture_bind(texture, gl_info, srgb);
1947     }
1948     else
1949     {
1950         if (surface->texture_level)
1951         {
1952             ERR("Standalone surface %p is non-zero texture level %u.\n",
1953                     surface, surface->texture_level);
1954         }
1955
1956         if (srgb)
1957             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
1958
1959         ENTER_GL();
1960
1961         if (!surface->texture_name)
1962         {
1963             glGenTextures(1, &surface->texture_name);
1964             checkGLcall("glGenTextures");
1965
1966             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
1967
1968             glBindTexture(surface->texture_target, surface->texture_name);
1969             checkGLcall("glBindTexture");
1970             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
1971             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
1972             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
1973             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1974             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1975             checkGLcall("glTexParameteri");
1976         }
1977         else
1978         {
1979             glBindTexture(surface->texture_target, surface->texture_name);
1980             checkGLcall("glBindTexture");
1981         }
1982
1983         LEAVE_GL();
1984     }
1985 }
1986
1987 /* This function checks if the primary render target uses the 8bit paletted format. */
1988 static BOOL primary_render_target_is_p8(struct wined3d_device *device)
1989 {
1990     if (device->render_targets && device->render_targets[0])
1991     {
1992         struct wined3d_surface *render_target = device->render_targets[0];
1993         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1994                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1995             return TRUE;
1996     }
1997     return FALSE;
1998 }
1999
2000 /* This call just downloads data, the caller is responsible for binding the
2001  * correct texture. */
2002 /* Context activation is done by the caller. */
2003 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2004 {
2005     const struct wined3d_format *format = surface->resource.format;
2006
2007     /* Only support read back of converted P8 surfaces. */
2008     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2009     {
2010         FIXME("Readback conversion not supported for format %s.\n", debug_d3dformat(format->id));
2011         return;
2012     }
2013
2014     ENTER_GL();
2015
2016     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2017     {
2018         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2019                 surface, surface->texture_level, format->glFormat, format->glType,
2020                 surface->resource.allocatedMemory);
2021
2022         if (surface->flags & SFLAG_PBO)
2023         {
2024             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2025             checkGLcall("glBindBufferARB");
2026             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2027             checkGLcall("glGetCompressedTexImageARB");
2028             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2029             checkGLcall("glBindBufferARB");
2030         }
2031         else
2032         {
2033             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2034                     surface->texture_level, surface->resource.allocatedMemory));
2035             checkGLcall("glGetCompressedTexImageARB");
2036         }
2037
2038         LEAVE_GL();
2039     }
2040     else
2041     {
2042         void *mem;
2043         GLenum gl_format = format->glFormat;
2044         GLenum gl_type = format->glType;
2045         int src_pitch = 0;
2046         int dst_pitch = 0;
2047
2048         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2049         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2050         {
2051             gl_format = GL_ALPHA;
2052             gl_type = GL_UNSIGNED_BYTE;
2053         }
2054
2055         if (surface->flags & SFLAG_NONPOW2)
2056         {
2057             unsigned char alignment = surface->resource.device->surface_alignment;
2058             src_pitch = format->byte_count * surface->pow2Width;
2059             dst_pitch = wined3d_surface_get_pitch(surface);
2060             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2061             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2062         }
2063         else
2064         {
2065             mem = surface->resource.allocatedMemory;
2066         }
2067
2068         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2069                 surface, surface->texture_level, gl_format, gl_type, mem);
2070
2071         if (surface->flags & SFLAG_PBO)
2072         {
2073             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2074             checkGLcall("glBindBufferARB");
2075
2076             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2077             checkGLcall("glGetTexImage");
2078
2079             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2080             checkGLcall("glBindBufferARB");
2081         }
2082         else
2083         {
2084             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2085             checkGLcall("glGetTexImage");
2086         }
2087         LEAVE_GL();
2088
2089         if (surface->flags & SFLAG_NONPOW2)
2090         {
2091             const BYTE *src_data;
2092             BYTE *dst_data;
2093             UINT y;
2094             /*
2095              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2096              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2097              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2098              *
2099              * We're doing this...
2100              *
2101              * instead of boxing the texture :
2102              * |<-texture width ->|  -->pow2width|   /\
2103              * |111111111111111111|              |   |
2104              * |222 Texture 222222| boxed empty  | texture height
2105              * |3333 Data 33333333|              |   |
2106              * |444444444444444444|              |   \/
2107              * -----------------------------------   |
2108              * |     boxed  empty | boxed empty  | pow2height
2109              * |                  |              |   \/
2110              * -----------------------------------
2111              *
2112              *
2113              * we're repacking the data to the expected texture width
2114              *
2115              * |<-texture width ->|  -->pow2width|   /\
2116              * |111111111111111111222222222222222|   |
2117              * |222333333333333333333444444444444| texture height
2118              * |444444                           |   |
2119              * |                                 |   \/
2120              * |                                 |   |
2121              * |            empty                | pow2height
2122              * |                                 |   \/
2123              * -----------------------------------
2124              *
2125              * == is the same as
2126              *
2127              * |<-texture width ->|    /\
2128              * |111111111111111111|
2129              * |222222222222222222|texture height
2130              * |333333333333333333|
2131              * |444444444444444444|    \/
2132              * --------------------
2133              *
2134              * this also means that any references to allocatedMemory should work with the data as if were a
2135              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2136              *
2137              * internally the texture is still stored in a boxed format so any references to textureName will
2138              * get a boxed texture with width pow2width and not a texture of width resource.width.
2139              *
2140              * Performance should not be an issue, because applications normally do not lock the surfaces when
2141              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2142              * and doesn't have to be re-read. */
2143             src_data = mem;
2144             dst_data = surface->resource.allocatedMemory;
2145             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2146             for (y = 1; y < surface->resource.height; ++y)
2147             {
2148                 /* skip the first row */
2149                 src_data += src_pitch;
2150                 dst_data += dst_pitch;
2151                 memcpy(dst_data, src_data, dst_pitch);
2152             }
2153
2154             HeapFree(GetProcessHeap(), 0, mem);
2155         }
2156     }
2157
2158     /* Surface has now been downloaded */
2159     surface->flags |= SFLAG_INSYSMEM;
2160 }
2161
2162 /* This call just uploads data, the caller is responsible for binding the
2163  * correct texture. */
2164 /* Context activation is done by the caller. */
2165 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2166         const struct wined3d_format *format, BOOL srgb, const GLvoid *data)
2167 {
2168     GLsizei width = surface->resource.width;
2169     GLsizei height = surface->resource.height;
2170     GLenum internal;
2171
2172     if (srgb)
2173     {
2174         internal = format->glGammaInternal;
2175     }
2176     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2177     {
2178         internal = format->rtInternal;
2179     }
2180     else
2181     {
2182         internal = format->glInternal;
2183     }
2184
2185     TRACE("surface %p, internal %#x, width %d, height %d, format %#x, type %#x, data %p.\n",
2186             surface, internal, width, height, format->glFormat, format->glType, data);
2187     TRACE("target %#x, level %u, resource size %u.\n",
2188             surface->texture_target, surface->texture_level, surface->resource.size);
2189
2190     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2191
2192     ENTER_GL();
2193
2194     if (surface->flags & SFLAG_PBO)
2195     {
2196         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
2197         checkGLcall("glBindBufferARB");
2198
2199         TRACE("(%p) pbo: %#x, data: %p.\n", surface, surface->pbo, data);
2200         data = NULL;
2201     }
2202
2203     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2204     {
2205         TRACE("Calling glCompressedTexSubImage2DARB.\n");
2206
2207         GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2208                 0, 0, width, height, internal, surface->resource.size, data));
2209         checkGLcall("glCompressedTexSubImage2DARB");
2210     }
2211     else
2212     {
2213         TRACE("Calling glTexSubImage2D.\n");
2214
2215         glTexSubImage2D(surface->texture_target, surface->texture_level,
2216                 0, 0, width, height, format->glFormat, format->glType, data);
2217         checkGLcall("glTexSubImage2D");
2218     }
2219
2220     if (surface->flags & SFLAG_PBO)
2221     {
2222         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2223         checkGLcall("glBindBufferARB");
2224     }
2225
2226     LEAVE_GL();
2227
2228     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2229     {
2230         struct wined3d_device *device = surface->resource.device;
2231         unsigned int i;
2232
2233         for (i = 0; i < device->context_count; ++i)
2234         {
2235             context_surface_update(device->contexts[i], surface);
2236         }
2237     }
2238 }
2239
2240 /* This call just allocates the texture, the caller is responsible for binding
2241  * the correct texture. */
2242 /* Context activation is done by the caller. */
2243 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2244         const struct wined3d_format *format, BOOL srgb)
2245 {
2246     BOOL enable_client_storage = FALSE;
2247     GLsizei width = surface->pow2Width;
2248     GLsizei height = surface->pow2Height;
2249     const BYTE *mem = NULL;
2250     GLenum internal;
2251
2252     if (srgb)
2253     {
2254         internal = format->glGammaInternal;
2255     }
2256     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2257     {
2258         internal = format->rtInternal;
2259     }
2260     else
2261     {
2262         internal = format->glInternal;
2263     }
2264
2265     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2266
2267     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2268             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2269             internal, width, height, format->glFormat, format->glType);
2270
2271     ENTER_GL();
2272
2273     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2274     {
2275         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2276                 || !surface->resource.allocatedMemory)
2277         {
2278             /* In some cases we want to disable client storage.
2279              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2280              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2281              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2282              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2283              */
2284             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2285             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2286             surface->flags &= ~SFLAG_CLIENT;
2287             enable_client_storage = TRUE;
2288         }
2289         else
2290         {
2291             surface->flags |= SFLAG_CLIENT;
2292
2293             /* Point OpenGL to our allocated texture memory. Do not use
2294              * resource.allocatedMemory here because it might point into a
2295              * PBO. Instead use heapMemory, but get the alignment right. */
2296             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2297                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2298         }
2299     }
2300
2301     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2302     {
2303         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2304                 internal, width, height, 0, surface->resource.size, mem));
2305         checkGLcall("glCompressedTexImage2DARB");
2306     }
2307     else
2308     {
2309         glTexImage2D(surface->texture_target, surface->texture_level,
2310                 internal, width, height, 0, format->glFormat, format->glType, mem);
2311         checkGLcall("glTexImage2D");
2312     }
2313
2314     if(enable_client_storage) {
2315         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2316         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2317     }
2318     LEAVE_GL();
2319 }
2320
2321 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2322  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2323 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2324 /* GL locking is done by the caller */
2325 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, struct wined3d_surface *rt)
2326 {
2327     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2328     struct wined3d_renderbuffer_entry *entry;
2329     GLuint renderbuffer = 0;
2330     unsigned int src_width, src_height;
2331     unsigned int width, height;
2332
2333     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2334     {
2335         width = rt->pow2Width;
2336         height = rt->pow2Height;
2337     }
2338     else
2339     {
2340         width = surface->pow2Width;
2341         height = surface->pow2Height;
2342     }
2343
2344     src_width = surface->pow2Width;
2345     src_height = surface->pow2Height;
2346
2347     /* A depth stencil smaller than the render target is not valid */
2348     if (width > src_width || height > src_height) return;
2349
2350     /* Remove any renderbuffer set if the sizes match */
2351     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2352             || (width == src_width && height == src_height))
2353     {
2354         surface->current_renderbuffer = NULL;
2355         return;
2356     }
2357
2358     /* Look if we've already got a renderbuffer of the correct dimensions */
2359     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2360     {
2361         if (entry->width == width && entry->height == height)
2362         {
2363             renderbuffer = entry->id;
2364             surface->current_renderbuffer = entry;
2365             break;
2366         }
2367     }
2368
2369     if (!renderbuffer)
2370     {
2371         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2372         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2373         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2374                 surface->resource.format->glInternal, width, height);
2375
2376         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2377         entry->width = width;
2378         entry->height = height;
2379         entry->id = renderbuffer;
2380         list_add_head(&surface->renderbuffers, &entry->entry);
2381
2382         surface->current_renderbuffer = entry;
2383     }
2384
2385     checkGLcall("set_compatible_renderbuffer");
2386 }
2387
2388 GLenum surface_get_gl_buffer(struct wined3d_surface *surface)
2389 {
2390     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2391
2392     TRACE("surface %p.\n", surface);
2393
2394     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2395     {
2396         ERR("Surface %p is not on a swapchain.\n", surface);
2397         return GL_NONE;
2398     }
2399
2400     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2401     {
2402         if (swapchain->render_to_fbo)
2403         {
2404             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2405             return GL_COLOR_ATTACHMENT0;
2406         }
2407         TRACE("Returning GL_BACK\n");
2408         return GL_BACK;
2409     }
2410     else if (surface == swapchain->front_buffer)
2411     {
2412         TRACE("Returning GL_FRONT\n");
2413         return GL_FRONT;
2414     }
2415
2416     FIXME("Higher back buffer, returning GL_BACK\n");
2417     return GL_BACK;
2418 }
2419
2420 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2421 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2422 {
2423     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2424
2425     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2426         /* No partial locking for textures yet. */
2427         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2428
2429     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2430     if (dirty_rect)
2431     {
2432         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2433         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2434         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2435         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2436     }
2437     else
2438     {
2439         surface->dirtyRect.left = 0;
2440         surface->dirtyRect.top = 0;
2441         surface->dirtyRect.right = surface->resource.width;
2442         surface->dirtyRect.bottom = surface->resource.height;
2443     }
2444
2445     /* if the container is a texture then mark it dirty. */
2446     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2447     {
2448         TRACE("Passing to container.\n");
2449         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2450     }
2451 }
2452
2453 static BOOL surface_convert_color_to_float(struct wined3d_surface *surface,
2454         DWORD color, WINED3DCOLORVALUE *float_color)
2455 {
2456     const struct wined3d_format *format = surface->resource.format;
2457     struct wined3d_device *device = surface->resource.device;
2458
2459     switch (format->id)
2460     {
2461         case WINED3DFMT_P8_UINT:
2462             if (surface->palette)
2463             {
2464                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
2465                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
2466                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
2467             }
2468             else
2469             {
2470                 float_color->r = 0.0f;
2471                 float_color->g = 0.0f;
2472                 float_color->b = 0.0f;
2473             }
2474             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
2475             break;
2476
2477         case WINED3DFMT_B5G6R5_UNORM:
2478             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
2479             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
2480             float_color->b = (color & 0x1f) / 31.0f;
2481             float_color->a = 1.0f;
2482             break;
2483
2484         case WINED3DFMT_B8G8R8_UNORM:
2485         case WINED3DFMT_B8G8R8X8_UNORM:
2486             float_color->r = D3DCOLOR_R(color);
2487             float_color->g = D3DCOLOR_G(color);
2488             float_color->b = D3DCOLOR_B(color);
2489             float_color->a = 1.0f;
2490             break;
2491
2492         case WINED3DFMT_B8G8R8A8_UNORM:
2493             float_color->r = D3DCOLOR_R(color);
2494             float_color->g = D3DCOLOR_G(color);
2495             float_color->b = D3DCOLOR_B(color);
2496             float_color->a = D3DCOLOR_A(color);
2497             break;
2498
2499         default:
2500             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
2501             return FALSE;
2502     }
2503
2504     return TRUE;
2505 }
2506
2507 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2508 {
2509     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2510
2511     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2512
2513     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2514     {
2515         ERR("Not supported on scratch surfaces.\n");
2516         return WINED3DERR_INVALIDCALL;
2517     }
2518
2519     if (!(surface->flags & flag))
2520     {
2521         TRACE("Reloading because surface is dirty\n");
2522     }
2523     /* Reload if either the texture and sysmem have different ideas about the
2524      * color key, or the actual key values changed. */
2525     else if (!(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2526             || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2527             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2528             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2529     {
2530         TRACE("Reloading because of color keying\n");
2531         /* To perform the color key conversion we need a sysmem copy of
2532          * the surface. Make sure we have it. */
2533
2534         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2535         /* Make sure the texture is reloaded because of the color key change,
2536          * this kills performance though :( */
2537         /* TODO: This is not necessarily needed with hw palettized texture support. */
2538         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2539     }
2540     else
2541     {
2542         TRACE("surface is already in texture\n");
2543         return WINED3D_OK;
2544     }
2545
2546     /* No partial locking for textures yet. */
2547     surface_load_location(surface, flag, NULL);
2548     surface_evict_sysmem(surface);
2549
2550     return WINED3D_OK;
2551 }
2552
2553 /* See also float_16_to_32() in wined3d_private.h */
2554 static inline unsigned short float_32_to_16(const float *in)
2555 {
2556     int exp = 0;
2557     float tmp = fabsf(*in);
2558     unsigned int mantissa;
2559     unsigned short ret;
2560
2561     /* Deal with special numbers */
2562     if (*in == 0.0f)
2563         return 0x0000;
2564     if (isnan(*in))
2565         return 0x7c01;
2566     if (isinf(*in))
2567         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2568
2569     if (tmp < powf(2, 10))
2570     {
2571         do
2572         {
2573             tmp = tmp * 2.0f;
2574             exp--;
2575         } while (tmp < powf(2, 10));
2576     }
2577     else if (tmp >= powf(2, 11))
2578     {
2579         do
2580         {
2581             tmp /= 2.0f;
2582             exp++;
2583         } while (tmp >= powf(2, 11));
2584     }
2585
2586     mantissa = (unsigned int)tmp;
2587     if (tmp - mantissa >= 0.5f)
2588         ++mantissa; /* Round to nearest, away from zero. */
2589
2590     exp += 10;  /* Normalize the mantissa. */
2591     exp += 15;  /* Exponent is encoded with excess 15. */
2592
2593     if (exp > 30) /* too big */
2594     {
2595         ret = 0x7c00; /* INF */
2596     }
2597     else if (exp <= 0)
2598     {
2599         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2600         while (exp <= 0)
2601         {
2602             mantissa = mantissa >> 1;
2603             ++exp;
2604         }
2605         ret = mantissa & 0x3ff;
2606     }
2607     else
2608     {
2609         ret = (exp << 10) | (mantissa & 0x3ff);
2610     }
2611
2612     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2613     return ret;
2614 }
2615
2616 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2617 {
2618     ULONG refcount;
2619
2620     TRACE("Surface %p, container %p of type %#x.\n",
2621             surface, surface->container.u.base, surface->container.type);
2622
2623     switch (surface->container.type)
2624     {
2625         case WINED3D_CONTAINER_TEXTURE:
2626             return wined3d_texture_incref(surface->container.u.texture);
2627
2628         case WINED3D_CONTAINER_SWAPCHAIN:
2629             return wined3d_swapchain_incref(surface->container.u.swapchain);
2630
2631         default:
2632             ERR("Unhandled container type %#x.\n", surface->container.type);
2633         case WINED3D_CONTAINER_NONE:
2634             break;
2635     }
2636
2637     refcount = InterlockedIncrement(&surface->resource.ref);
2638     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2639
2640     return refcount;
2641 }
2642
2643 /* Do not call while under the GL lock. */
2644 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2645 {
2646     ULONG refcount;
2647
2648     TRACE("Surface %p, container %p of type %#x.\n",
2649             surface, surface->container.u.base, surface->container.type);
2650
2651     switch (surface->container.type)
2652     {
2653         case WINED3D_CONTAINER_TEXTURE:
2654             return wined3d_texture_decref(surface->container.u.texture);
2655
2656         case WINED3D_CONTAINER_SWAPCHAIN:
2657             return wined3d_swapchain_decref(surface->container.u.swapchain);
2658
2659         default:
2660             ERR("Unhandled container type %#x.\n", surface->container.type);
2661         case WINED3D_CONTAINER_NONE:
2662             break;
2663     }
2664
2665     refcount = InterlockedDecrement(&surface->resource.ref);
2666     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2667
2668     if (!refcount)
2669     {
2670         surface->surface_ops->surface_cleanup(surface);
2671         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2672
2673         TRACE("Destroyed surface %p.\n", surface);
2674         HeapFree(GetProcessHeap(), 0, surface);
2675     }
2676
2677     return refcount;
2678 }
2679
2680 HRESULT CDECL wined3d_surface_set_private_data(struct wined3d_surface *surface,
2681         REFGUID riid, const void *data, DWORD data_size, DWORD flags)
2682 {
2683     return resource_set_private_data(&surface->resource, riid, data, data_size, flags);
2684 }
2685
2686 HRESULT CDECL wined3d_surface_get_private_data(const struct wined3d_surface *surface,
2687         REFGUID guid, void *data, DWORD *data_size)
2688 {
2689     return resource_get_private_data(&surface->resource, guid, data, data_size);
2690 }
2691
2692 HRESULT CDECL wined3d_surface_free_private_data(struct wined3d_surface *surface, REFGUID refguid)
2693 {
2694     return resource_free_private_data(&surface->resource, refguid);
2695 }
2696
2697 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2698 {
2699     return resource_set_priority(&surface->resource, priority);
2700 }
2701
2702 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2703 {
2704     return resource_get_priority(&surface->resource);
2705 }
2706
2707 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2708 {
2709     TRACE("surface %p.\n", surface);
2710
2711     surface->surface_ops->surface_preload(surface);
2712 }
2713
2714 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2715 {
2716     TRACE("surface %p.\n", surface);
2717
2718     return surface->resource.parent;
2719 }
2720
2721 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2722 {
2723     TRACE("surface %p.\n", surface);
2724
2725     return &surface->resource;
2726 }
2727
2728 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2729 {
2730     TRACE("surface %p, flags %#x.\n", surface, flags);
2731
2732     switch (flags)
2733     {
2734         case WINEDDGBS_CANBLT:
2735         case WINEDDGBS_ISBLTDONE:
2736             return WINED3D_OK;
2737
2738         default:
2739             return WINED3DERR_INVALIDCALL;
2740     }
2741 }
2742
2743 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2744 {
2745     TRACE("surface %p, flags %#x.\n", surface, flags);
2746
2747     /* XXX: DDERR_INVALIDSURFACETYPE */
2748
2749     switch (flags)
2750     {
2751         case WINEDDGFS_CANFLIP:
2752         case WINEDDGFS_ISFLIPDONE:
2753             return WINED3D_OK;
2754
2755         default:
2756             return WINED3DERR_INVALIDCALL;
2757     }
2758 }
2759
2760 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2761 {
2762     TRACE("surface %p.\n", surface);
2763
2764     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2765     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2766 }
2767
2768 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2769 {
2770     TRACE("surface %p.\n", surface);
2771
2772     /* So far we don't lose anything :) */
2773     surface->flags &= ~SFLAG_LOST;
2774     return WINED3D_OK;
2775 }
2776
2777 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2778 {
2779     TRACE("surface %p, palette %p.\n", surface, palette);
2780
2781     if (surface->palette == palette)
2782     {
2783         TRACE("Nop palette change.\n");
2784         return WINED3D_OK;
2785     }
2786
2787     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2788         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2789
2790     surface->palette = palette;
2791
2792     if (palette)
2793     {
2794         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2795             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2796
2797         surface->surface_ops->surface_realize_palette(surface);
2798     }
2799
2800     return WINED3D_OK;
2801 }
2802
2803 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2804         DWORD flags, const WINEDDCOLORKEY *color_key)
2805 {
2806     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
2807
2808     if (flags & WINEDDCKEY_COLORSPACE)
2809     {
2810         FIXME(" colorkey value not supported (%08x) !\n", flags);
2811         return WINED3DERR_INVALIDCALL;
2812     }
2813
2814     /* Dirtify the surface, but only if a key was changed. */
2815     if (color_key)
2816     {
2817         switch (flags & ~WINEDDCKEY_COLORSPACE)
2818         {
2819             case WINEDDCKEY_DESTBLT:
2820                 surface->DestBltCKey = *color_key;
2821                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
2822                 break;
2823
2824             case WINEDDCKEY_DESTOVERLAY:
2825                 surface->DestOverlayCKey = *color_key;
2826                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
2827                 break;
2828
2829             case WINEDDCKEY_SRCOVERLAY:
2830                 surface->SrcOverlayCKey = *color_key;
2831                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
2832                 break;
2833
2834             case WINEDDCKEY_SRCBLT:
2835                 surface->SrcBltCKey = *color_key;
2836                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
2837                 break;
2838         }
2839     }
2840     else
2841     {
2842         switch (flags & ~WINEDDCKEY_COLORSPACE)
2843         {
2844             case WINEDDCKEY_DESTBLT:
2845                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
2846                 break;
2847
2848             case WINEDDCKEY_DESTOVERLAY:
2849                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
2850                 break;
2851
2852             case WINEDDCKEY_SRCOVERLAY:
2853                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
2854                 break;
2855
2856             case WINEDDCKEY_SRCBLT:
2857                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
2858                 break;
2859         }
2860     }
2861
2862     return WINED3D_OK;
2863 }
2864
2865 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
2866 {
2867     TRACE("surface %p.\n", surface);
2868
2869     return surface->palette;
2870 }
2871
2872 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
2873 {
2874     const struct wined3d_format *format = surface->resource.format;
2875     DWORD pitch;
2876
2877     TRACE("surface %p.\n", surface);
2878
2879     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
2880     {
2881         /* Since compressed formats are block based, pitch means the amount of
2882          * bytes to the next row of block rather than the next row of pixels. */
2883         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
2884         pitch = row_block_count * format->block_byte_count;
2885     }
2886     else
2887     {
2888         unsigned char alignment = surface->resource.device->surface_alignment;
2889         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
2890         pitch = (pitch + alignment - 1) & ~(alignment - 1);
2891     }
2892
2893     TRACE("Returning %u.\n", pitch);
2894
2895     return pitch;
2896 }
2897
2898 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
2899 {
2900     TRACE("surface %p, mem %p.\n", surface, mem);
2901
2902     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
2903     {
2904         WARN("Surface is locked or the DC is in use.\n");
2905         return WINED3DERR_INVALIDCALL;
2906     }
2907
2908     return surface->surface_ops->surface_set_mem(surface, mem);
2909 }
2910
2911 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
2912 {
2913     LONG w, h;
2914
2915     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
2916
2917     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2918     {
2919         WARN("Not an overlay surface.\n");
2920         return WINEDDERR_NOTAOVERLAYSURFACE;
2921     }
2922
2923     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
2924     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
2925     surface->overlay_destrect.left = x;
2926     surface->overlay_destrect.top = y;
2927     surface->overlay_destrect.right = x + w;
2928     surface->overlay_destrect.bottom = y + h;
2929
2930     surface->surface_ops->surface_draw_overlay(surface);
2931
2932     return WINED3D_OK;
2933 }
2934
2935 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
2936 {
2937     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
2938
2939     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2940     {
2941         TRACE("Not an overlay surface.\n");
2942         return WINEDDERR_NOTAOVERLAYSURFACE;
2943     }
2944
2945     if (!surface->overlay_dest)
2946     {
2947         TRACE("Overlay not visible.\n");
2948         *x = 0;
2949         *y = 0;
2950         return WINEDDERR_OVERLAYNOTVISIBLE;
2951     }
2952
2953     *x = surface->overlay_destrect.left;
2954     *y = surface->overlay_destrect.top;
2955
2956     TRACE("Returning position %d, %d.\n", *x, *y);
2957
2958     return WINED3D_OK;
2959 }
2960
2961 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
2962         DWORD flags, struct wined3d_surface *ref)
2963 {
2964     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
2965
2966     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2967     {
2968         TRACE("Not an overlay surface.\n");
2969         return WINEDDERR_NOTAOVERLAYSURFACE;
2970     }
2971
2972     return WINED3D_OK;
2973 }
2974
2975 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
2976         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
2977 {
2978     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
2979             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
2980
2981     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2982     {
2983         WARN("Not an overlay surface.\n");
2984         return WINEDDERR_NOTAOVERLAYSURFACE;
2985     }
2986     else if (!dst_surface)
2987     {
2988         WARN("Dest surface is NULL.\n");
2989         return WINED3DERR_INVALIDCALL;
2990     }
2991
2992     if (src_rect)
2993     {
2994         surface->overlay_srcrect = *src_rect;
2995     }
2996     else
2997     {
2998         surface->overlay_srcrect.left = 0;
2999         surface->overlay_srcrect.top = 0;
3000         surface->overlay_srcrect.right = surface->resource.width;
3001         surface->overlay_srcrect.bottom = surface->resource.height;
3002     }
3003
3004     if (dst_rect)
3005     {
3006         surface->overlay_destrect = *dst_rect;
3007     }
3008     else
3009     {
3010         surface->overlay_destrect.left = 0;
3011         surface->overlay_destrect.top = 0;
3012         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3013         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3014     }
3015
3016     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3017     {
3018         list_remove(&surface->overlay_entry);
3019     }
3020
3021     if (flags & WINEDDOVER_SHOW)
3022     {
3023         if (surface->overlay_dest != dst_surface)
3024         {
3025             surface->overlay_dest = dst_surface;
3026             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3027         }
3028     }
3029     else if (flags & WINEDDOVER_HIDE)
3030     {
3031         /* tests show that the rectangles are erased on hide */
3032         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3033         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3034         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3035         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3036         surface->overlay_dest = NULL;
3037     }
3038
3039     surface->surface_ops->surface_draw_overlay(surface);
3040
3041     return WINED3D_OK;
3042 }
3043
3044 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3045 {
3046     TRACE("surface %p, clipper %p.\n", surface, clipper);
3047
3048     surface->clipper = clipper;
3049
3050     return WINED3D_OK;
3051 }
3052
3053 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3054 {
3055     TRACE("surface %p.\n", surface);
3056
3057     return surface->clipper;
3058 }
3059
3060 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3061 {
3062     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3063
3064     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3065
3066     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3067     {
3068         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3069         return WINED3DERR_INVALIDCALL;
3070     }
3071
3072     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3073             surface->pow2Width, surface->pow2Height);
3074     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3075     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
3076     surface->resource.format = format;
3077
3078     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3079     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3080             format->glFormat, format->glInternal, format->glType);
3081
3082     return WINED3D_OK;
3083 }
3084
3085 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3086         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3087 {
3088     unsigned short *dst_s;
3089     const float *src_f;
3090     unsigned int x, y;
3091
3092     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3093
3094     for (y = 0; y < h; ++y)
3095     {
3096         src_f = (const float *)(src + y * pitch_in);
3097         dst_s = (unsigned short *) (dst + y * pitch_out);
3098         for (x = 0; x < w; ++x)
3099         {
3100             dst_s[x] = float_32_to_16(src_f + x);
3101         }
3102     }
3103 }
3104
3105 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3106         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3107 {
3108     static const unsigned char convert_5to8[] =
3109     {
3110         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3111         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3112         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3113         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3114     };
3115     static const unsigned char convert_6to8[] =
3116     {
3117         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3118         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3119         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3120         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3121         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3122         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3123         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3124         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3125     };
3126     unsigned int x, y;
3127
3128     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3129
3130     for (y = 0; y < h; ++y)
3131     {
3132         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3133         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3134         for (x = 0; x < w; ++x)
3135         {
3136             WORD pixel = src_line[x];
3137             dst_line[x] = 0xff000000
3138                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3139                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3140                     | convert_5to8[(pixel & 0x001f)];
3141         }
3142     }
3143 }
3144
3145 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3146         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3147 {
3148     unsigned int x, y;
3149
3150     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3151
3152     for (y = 0; y < h; ++y)
3153     {
3154         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3155         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3156
3157         for (x = 0; x < w; ++x)
3158         {
3159             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3160         }
3161     }
3162 }
3163
3164 static inline BYTE cliptobyte(int x)
3165 {
3166     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3167 }
3168
3169 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3170         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3171 {
3172     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3173     unsigned int x, y;
3174
3175     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3176
3177     for (y = 0; y < h; ++y)
3178     {
3179         const BYTE *src_line = src + y * pitch_in;
3180         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3181         for (x = 0; x < w; ++x)
3182         {
3183             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3184              *     C = Y - 16; D = U - 128; E = V - 128;
3185              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3186              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3187              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3188              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3189              * U and V are shared between the pixels. */
3190             if (!(x & 1)) /* For every even pixel, read new U and V. */
3191             {
3192                 d = (int) src_line[1] - 128;
3193                 e = (int) src_line[3] - 128;
3194                 r2 = 409 * e + 128;
3195                 g2 = - 100 * d - 208 * e + 128;
3196                 b2 = 516 * d + 128;
3197             }
3198             c2 = 298 * ((int) src_line[0] - 16);
3199             dst_line[x] = 0xff000000
3200                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3201                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3202                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3203                 /* Scale RGB values to 0..255 range,
3204                  * then clip them if still not in range (may be negative),
3205                  * then shift them within DWORD if necessary. */
3206             src_line += 2;
3207         }
3208     }
3209 }
3210
3211 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3212         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3213 {
3214     unsigned int x, y;
3215     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3216
3217     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3218
3219     for (y = 0; y < h; ++y)
3220     {
3221         const BYTE *src_line = src + y * pitch_in;
3222         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3223         for (x = 0; x < w; ++x)
3224         {
3225             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3226              *     C = Y - 16; D = U - 128; E = V - 128;
3227              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3228              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3229              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3230              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3231              * U and V are shared between the pixels. */
3232             if (!(x & 1)) /* For every even pixel, read new U and V. */
3233             {
3234                 d = (int) src_line[1] - 128;
3235                 e = (int) src_line[3] - 128;
3236                 r2 = 409 * e + 128;
3237                 g2 = - 100 * d - 208 * e + 128;
3238                 b2 = 516 * d + 128;
3239             }
3240             c2 = 298 * ((int) src_line[0] - 16);
3241             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3242                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3243                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3244                 /* Scale RGB values to 0..255 range,
3245                  * then clip them if still not in range (may be negative),
3246                  * then shift them within DWORD if necessary. */
3247             src_line += 2;
3248         }
3249     }
3250 }
3251
3252 struct d3dfmt_convertor_desc
3253 {
3254     enum wined3d_format_id from, to;
3255     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3256 };
3257
3258 static const struct d3dfmt_convertor_desc convertors[] =
3259 {
3260     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3261     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3262     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3263     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3264     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3265 };
3266
3267 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3268         enum wined3d_format_id to)
3269 {
3270     unsigned int i;
3271
3272     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3273     {
3274         if (convertors[i].from == from && convertors[i].to == to)
3275             return &convertors[i];
3276     }
3277
3278     return NULL;
3279 }
3280
3281 /*****************************************************************************
3282  * surface_convert_format
3283  *
3284  * Creates a duplicate of a surface in a different format. Is used by Blt to
3285  * blit between surfaces with different formats.
3286  *
3287  * Parameters
3288  *  source: Source surface
3289  *  fmt: Requested destination format
3290  *
3291  *****************************************************************************/
3292 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3293 {
3294     const struct d3dfmt_convertor_desc *conv;
3295     WINED3DLOCKED_RECT lock_src, lock_dst;
3296     struct wined3d_surface *ret = NULL;
3297     HRESULT hr;
3298
3299     conv = find_convertor(source->resource.format->id, to_fmt);
3300     if (!conv)
3301     {
3302         FIXME("Cannot find a conversion function from format %s to %s.\n",
3303                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3304         return NULL;
3305     }
3306
3307     wined3d_surface_create(source->resource.device, source->resource.width,
3308             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3309             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3310             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3311     if (!ret)
3312     {
3313         ERR("Failed to create a destination surface for conversion.\n");
3314         return NULL;
3315     }
3316
3317     memset(&lock_src, 0, sizeof(lock_src));
3318     memset(&lock_dst, 0, sizeof(lock_dst));
3319
3320     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3321     if (FAILED(hr))
3322     {
3323         ERR("Failed to lock the source surface.\n");
3324         wined3d_surface_decref(ret);
3325         return NULL;
3326     }
3327     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3328     if (FAILED(hr))
3329     {
3330         ERR("Failed to lock the destination surface.\n");
3331         wined3d_surface_unmap(source);
3332         wined3d_surface_decref(ret);
3333         return NULL;
3334     }
3335
3336     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3337             source->resource.width, source->resource.height);
3338
3339     wined3d_surface_unmap(ret);
3340     wined3d_surface_unmap(source);
3341
3342     return ret;
3343 }
3344
3345 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3346         unsigned int bpp, UINT pitch, DWORD color)
3347 {
3348     BYTE *first;
3349     int x, y;
3350
3351     /* Do first row */
3352
3353 #define COLORFILL_ROW(type) \
3354 do { \
3355     type *d = (type *)buf; \
3356     for (x = 0; x < width; ++x) \
3357         d[x] = (type)color; \
3358 } while(0)
3359
3360     switch (bpp)
3361     {
3362         case 1:
3363             COLORFILL_ROW(BYTE);
3364             break;
3365
3366         case 2:
3367             COLORFILL_ROW(WORD);
3368             break;
3369
3370         case 3:
3371         {
3372             BYTE *d = buf;
3373             for (x = 0; x < width; ++x, d += 3)
3374             {
3375                 d[0] = (color      ) & 0xFF;
3376                 d[1] = (color >>  8) & 0xFF;
3377                 d[2] = (color >> 16) & 0xFF;
3378             }
3379             break;
3380         }
3381         case 4:
3382             COLORFILL_ROW(DWORD);
3383             break;
3384
3385         default:
3386             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3387             return WINED3DERR_NOTAVAILABLE;
3388     }
3389
3390 #undef COLORFILL_ROW
3391
3392     /* Now copy first row. */
3393     first = buf;
3394     for (y = 1; y < height; ++y)
3395     {
3396         buf += pitch;
3397         memcpy(buf, first, width * bpp);
3398     }
3399
3400     return WINED3D_OK;
3401 }
3402
3403 /* Do not call while under the GL lock. */
3404 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
3405         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
3406         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
3407 {
3408     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
3409             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
3410             flags, fx, debug_d3dtexturefiltertype(filter));
3411
3412     return dst_surface->surface_ops->surface_blt(dst_surface,
3413             dst_rect, src_surface, src_rect, flags, fx, filter);
3414 }
3415
3416 /* Do not call while under the GL lock. */
3417 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
3418         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
3419 {
3420     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, trans %#x.\n",
3421             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
3422
3423     return dst_surface->surface_ops->surface_bltfast(dst_surface,
3424             dst_x, dst_y, src_surface, src_rect, trans);
3425 }
3426
3427 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3428 {
3429     TRACE("surface %p.\n", surface);
3430
3431     if (!(surface->flags & SFLAG_LOCKED))
3432     {
3433         WARN("Trying to unmap unmapped surface.\n");
3434         return WINEDDERR_NOTLOCKED;
3435     }
3436     surface->flags &= ~SFLAG_LOCKED;
3437
3438     surface->surface_ops->surface_unmap(surface);
3439
3440     return WINED3D_OK;
3441 }
3442
3443 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3444         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3445 {
3446     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3447             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3448
3449     if (surface->flags & SFLAG_LOCKED)
3450     {
3451         WARN("Surface is already mapped.\n");
3452         return WINED3DERR_INVALIDCALL;
3453     }
3454     surface->flags |= SFLAG_LOCKED;
3455
3456     if (!(surface->flags & SFLAG_LOCKABLE))
3457         WARN("Trying to lock unlockable surface.\n");
3458
3459     surface->surface_ops->surface_map(surface, rect, flags);
3460
3461     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3462
3463     if (!rect)
3464     {
3465         locked_rect->pBits = surface->resource.allocatedMemory;
3466         surface->lockedRect.left = 0;
3467         surface->lockedRect.top = 0;
3468         surface->lockedRect.right = surface->resource.width;
3469         surface->lockedRect.bottom = surface->resource.height;
3470     }
3471     else
3472     {
3473         const struct wined3d_format *format = surface->resource.format;
3474
3475         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3476         {
3477             /* Compressed textures are block based, so calculate the offset of
3478              * the block that contains the top-left pixel of the locked rectangle. */
3479             locked_rect->pBits = surface->resource.allocatedMemory
3480                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3481                     + ((rect->left / format->block_width) * format->block_byte_count);
3482         }
3483         else
3484         {
3485             locked_rect->pBits = surface->resource.allocatedMemory
3486                     + (locked_rect->Pitch * rect->top)
3487                     + (rect->left * format->byte_count);
3488         }
3489         surface->lockedRect.left = rect->left;
3490         surface->lockedRect.top = rect->top;
3491         surface->lockedRect.right = rect->right;
3492         surface->lockedRect.bottom = rect->bottom;
3493     }
3494
3495     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3496     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3497
3498     return WINED3D_OK;
3499 }
3500
3501 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3502 {
3503     HRESULT hr;
3504
3505     TRACE("surface %p, dc %p.\n", surface, dc);
3506
3507     if (surface->flags & SFLAG_USERPTR)
3508     {
3509         ERR("Not supported on surfaces with application-provided memory.\n");
3510         return WINEDDERR_NODC;
3511     }
3512
3513     /* Give more detailed info for ddraw. */
3514     if (surface->flags & SFLAG_DCINUSE)
3515         return WINEDDERR_DCALREADYCREATED;
3516
3517     /* Can't GetDC if the surface is locked. */
3518     if (surface->flags & SFLAG_LOCKED)
3519         return WINED3DERR_INVALIDCALL;
3520
3521     hr = surface->surface_ops->surface_getdc(surface);
3522     if (FAILED(hr))
3523         return hr;
3524
3525     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3526             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3527     {
3528         /* GetDC on palettized formats is unsupported in D3D9, and the method
3529          * is missing in D3D8, so this should only be used for DX <=7
3530          * surfaces (with non-device palettes). */
3531         const PALETTEENTRY *pal = NULL;
3532
3533         if (surface->palette)
3534         {
3535             pal = surface->palette->palents;
3536         }
3537         else
3538         {
3539             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3540             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3541
3542             if (dds_primary && dds_primary->palette)
3543                 pal = dds_primary->palette->palents;
3544         }
3545
3546         if (pal)
3547         {
3548             RGBQUAD col[256];
3549             unsigned int i;
3550
3551             for (i = 0; i < 256; ++i)
3552             {
3553                 col[i].rgbRed = pal[i].peRed;
3554                 col[i].rgbGreen = pal[i].peGreen;
3555                 col[i].rgbBlue = pal[i].peBlue;
3556                 col[i].rgbReserved = 0;
3557             }
3558             SetDIBColorTable(surface->hDC, 0, 256, col);
3559         }
3560     }
3561
3562     surface->flags |= SFLAG_DCINUSE;
3563
3564     *dc = surface->hDC;
3565     TRACE("Returning dc %p.\n", *dc);
3566
3567     return WINED3D_OK;
3568 }
3569
3570 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3571 {
3572     TRACE("surface %p, dc %p.\n", surface, dc);
3573
3574     if (!(surface->flags & SFLAG_DCINUSE))
3575         return WINEDDERR_NODC;
3576
3577     if (surface->hDC != dc)
3578     {
3579         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3580                 dc, surface->hDC);
3581         return WINEDDERR_NODC;
3582     }
3583
3584     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3585     {
3586         /* Copy the contents of the DIB over to the PBO. */
3587         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->dib.bitmap_size);
3588     }
3589
3590     /* We locked first, so unlock now. */
3591     wined3d_surface_unmap(surface);
3592
3593     surface->flags &= ~SFLAG_DCINUSE;
3594
3595     return WINED3D_OK;
3596 }
3597
3598 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3599 {
3600     struct wined3d_swapchain *swapchain;
3601     HRESULT hr;
3602
3603     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3604
3605     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
3606     {
3607         ERR("Flipped surface is not on a swapchain.\n");
3608         return WINEDDERR_NOTFLIPPABLE;
3609     }
3610     swapchain = surface->container.u.swapchain;
3611
3612     hr = surface->surface_ops->surface_flip(surface, override);
3613     if (FAILED(hr))
3614         return hr;
3615
3616     /* Just overwrite the swapchain presentation interval. This is ok because
3617      * only ddraw apps can call Flip, and only d3d8 and d3d9 applications
3618      * specify the presentation interval. */
3619     if (!(flags & (WINEDDFLIP_NOVSYNC | WINEDDFLIP_INTERVAL2 | WINEDDFLIP_INTERVAL3 | WINEDDFLIP_INTERVAL4)))
3620         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_ONE;
3621     else if (flags & WINEDDFLIP_NOVSYNC)
3622         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
3623     else if (flags & WINEDDFLIP_INTERVAL2)
3624         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_TWO;
3625     else if (flags & WINEDDFLIP_INTERVAL3)
3626         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_THREE;
3627     else
3628         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_FOUR;
3629
3630     return wined3d_swapchain_present(swapchain, NULL, NULL, swapchain->win_handle, NULL, 0);
3631 }
3632
3633 /* Do not call while under the GL lock. */
3634 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3635 {
3636     struct wined3d_device *device = surface->resource.device;
3637
3638     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3639
3640     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3641     {
3642         struct wined3d_texture *texture = surface->container.u.texture;
3643
3644         TRACE("Passing to container (%p).\n", texture);
3645         texture->texture_ops->texture_preload(texture, srgb);
3646     }
3647     else
3648     {
3649         struct wined3d_context *context = NULL;
3650
3651         TRACE("(%p) : About to load surface\n", surface);
3652
3653         if (!device->isInDraw) context = context_acquire(device, NULL);
3654
3655         if (surface->resource.format->id == WINED3DFMT_P8_UINT
3656                 || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3657         {
3658             if (palette9_changed(surface))
3659             {
3660                 TRACE("Reloading surface because the d3d8/9 palette was changed\n");
3661                 /* TODO: This is not necessarily needed with hw palettized texture support */
3662                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3663                 /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
3664                 surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
3665             }
3666         }
3667
3668         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3669
3670         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3671         {
3672             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3673             GLclampf tmp;
3674             tmp = 0.9f;
3675             ENTER_GL();
3676             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3677             LEAVE_GL();
3678         }
3679
3680         if (context) context_release(context);
3681     }
3682 }
3683
3684 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3685 {
3686     if (!surface->resource.allocatedMemory)
3687     {
3688         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3689                 surface->resource.size + RESOURCE_ALIGNMENT);
3690         if (!surface->resource.heapMemory)
3691         {
3692             ERR("Out of memory\n");
3693             return FALSE;
3694         }
3695         surface->resource.allocatedMemory =
3696             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3697     }
3698     else
3699     {
3700         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3701     }
3702
3703     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3704
3705     return TRUE;
3706 }
3707
3708 /* Read the framebuffer back into the surface */
3709 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3710 {
3711     struct wined3d_device *device = surface->resource.device;
3712     const struct wined3d_gl_info *gl_info;
3713     struct wined3d_context *context;
3714     BYTE *mem;
3715     GLint fmt;
3716     GLint type;
3717     BYTE *row, *top, *bottom;
3718     int i;
3719     BOOL bpp;
3720     RECT local_rect;
3721     BOOL srcIsUpsideDown;
3722     GLint rowLen = 0;
3723     GLint skipPix = 0;
3724     GLint skipRow = 0;
3725
3726     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3727         static BOOL warned = FALSE;
3728         if(!warned) {
3729             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3730             warned = TRUE;
3731         }
3732         return;
3733     }
3734
3735     context = context_acquire(device, surface);
3736     context_apply_blit_state(context, device);
3737     gl_info = context->gl_info;
3738
3739     ENTER_GL();
3740
3741     /* Select the correct read buffer, and give some debug output.
3742      * There is no need to keep track of the current read buffer or reset it, every part of the code
3743      * that reads sets the read buffer as desired.
3744      */
3745     if (surface_is_offscreen(surface))
3746     {
3747         /* Mapping the primary render target which is not on a swapchain.
3748          * Read from the back buffer. */
3749         TRACE("Mapping offscreen render target.\n");
3750         glReadBuffer(device->offscreenBuffer);
3751         srcIsUpsideDown = TRUE;
3752     }
3753     else
3754     {
3755         /* Onscreen surfaces are always part of a swapchain */
3756         GLenum buffer = surface_get_gl_buffer(surface);
3757         TRACE("Mapping %#x buffer.\n", buffer);
3758         glReadBuffer(buffer);
3759         checkGLcall("glReadBuffer");
3760         srcIsUpsideDown = FALSE;
3761     }
3762
3763     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3764     if (!rect)
3765     {
3766         local_rect.left = 0;
3767         local_rect.top = 0;
3768         local_rect.right = surface->resource.width;
3769         local_rect.bottom = surface->resource.height;
3770     }
3771     else
3772     {
3773         local_rect = *rect;
3774     }
3775     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3776
3777     switch (surface->resource.format->id)
3778     {
3779         case WINED3DFMT_P8_UINT:
3780         {
3781             if (primary_render_target_is_p8(device))
3782             {
3783                 /* In case of P8 render targets the index is stored in the alpha component */
3784                 fmt = GL_ALPHA;
3785                 type = GL_UNSIGNED_BYTE;
3786                 mem = dest;
3787                 bpp = surface->resource.format->byte_count;
3788             }
3789             else
3790             {
3791                 /* GL can't return palettized data, so read ARGB pixels into a
3792                  * separate block of memory and convert them into palettized format
3793                  * in software. Slow, but if the app means to use palettized render
3794                  * targets and locks it...
3795                  *
3796                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
3797                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
3798                  * for the color channels when palettizing the colors.
3799                  */
3800                 fmt = GL_RGB;
3801                 type = GL_UNSIGNED_BYTE;
3802                 pitch *= 3;
3803                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
3804                 if (!mem)
3805                 {
3806                     ERR("Out of memory\n");
3807                     LEAVE_GL();
3808                     return;
3809                 }
3810                 bpp = surface->resource.format->byte_count * 3;
3811             }
3812         }
3813         break;
3814
3815         default:
3816             mem = dest;
3817             fmt = surface->resource.format->glFormat;
3818             type = surface->resource.format->glType;
3819             bpp = surface->resource.format->byte_count;
3820     }
3821
3822     if (surface->flags & SFLAG_PBO)
3823     {
3824         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
3825         checkGLcall("glBindBufferARB");
3826         if (mem)
3827         {
3828             ERR("mem not null for pbo -- unexpected\n");
3829             mem = NULL;
3830         }
3831     }
3832
3833     /* Save old pixel store pack state */
3834     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
3835     checkGLcall("glGetIntegerv");
3836     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
3837     checkGLcall("glGetIntegerv");
3838     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
3839     checkGLcall("glGetIntegerv");
3840
3841     /* Setup pixel store pack state -- to glReadPixels into the correct place */
3842     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
3843     checkGLcall("glPixelStorei");
3844     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
3845     checkGLcall("glPixelStorei");
3846     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
3847     checkGLcall("glPixelStorei");
3848
3849     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
3850             local_rect.right - local_rect.left,
3851             local_rect.bottom - local_rect.top,
3852             fmt, type, mem);
3853     checkGLcall("glReadPixels");
3854
3855     /* Reset previous pixel store pack state */
3856     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
3857     checkGLcall("glPixelStorei");
3858     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
3859     checkGLcall("glPixelStorei");
3860     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
3861     checkGLcall("glPixelStorei");
3862
3863     if (surface->flags & SFLAG_PBO)
3864     {
3865         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
3866         checkGLcall("glBindBufferARB");
3867
3868         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
3869          * to get a pointer to it and perform the flipping in software. This is a lot
3870          * faster than calling glReadPixels for each line. In case we want more speed
3871          * we should rerender it flipped in a FBO and read the data back from the FBO. */
3872         if (!srcIsUpsideDown)
3873         {
3874             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
3875             checkGLcall("glBindBufferARB");
3876
3877             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
3878             checkGLcall("glMapBufferARB");
3879         }
3880     }
3881
3882     /* TODO: Merge this with the palettization loop below for P8 targets */
3883     if(!srcIsUpsideDown) {
3884         UINT len, off;
3885         /* glReadPixels returns the image upside down, and there is no way to prevent this.
3886             Flip the lines in software */
3887         len = (local_rect.right - local_rect.left) * bpp;
3888         off = local_rect.left * bpp;
3889
3890         row = HeapAlloc(GetProcessHeap(), 0, len);
3891         if(!row) {
3892             ERR("Out of memory\n");
3893             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
3894                 HeapFree(GetProcessHeap(), 0, mem);
3895             LEAVE_GL();
3896             return;
3897         }
3898
3899         top = mem + pitch * local_rect.top;
3900         bottom = mem + pitch * (local_rect.bottom - 1);
3901         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
3902             memcpy(row, top + off, len);
3903             memcpy(top + off, bottom + off, len);
3904             memcpy(bottom + off, row, len);
3905             top += pitch;
3906             bottom -= pitch;
3907         }
3908         HeapFree(GetProcessHeap(), 0, row);
3909
3910         /* Unmap the temp PBO buffer */
3911         if (surface->flags & SFLAG_PBO)
3912         {
3913             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
3914             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
3915         }
3916     }
3917
3918     LEAVE_GL();
3919     context_release(context);
3920
3921     /* For P8 textures we need to perform an inverse palette lookup. This is
3922      * done by searching for a palette index which matches the RGB value.
3923      * Note this isn't guaranteed to work when there are multiple entries for
3924      * the same color but we have no choice. In case of P8 render targets,
3925      * the index is stored in the alpha component so no conversion is needed. */
3926     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
3927     {
3928         const PALETTEENTRY *pal = NULL;
3929         DWORD width = pitch / 3;
3930         int x, y, c;
3931
3932         if (surface->palette)
3933         {
3934             pal = surface->palette->palents;
3935         }
3936         else
3937         {
3938             ERR("Palette is missing, cannot perform inverse palette lookup\n");
3939             HeapFree(GetProcessHeap(), 0, mem);
3940             return;
3941         }
3942
3943         for(y = local_rect.top; y < local_rect.bottom; y++) {
3944             for(x = local_rect.left; x < local_rect.right; x++) {
3945                 /*                      start              lines            pixels      */
3946                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
3947                 const BYTE *green = blue  + 1;
3948                 const BYTE *red = green + 1;
3949
3950                 for(c = 0; c < 256; c++) {
3951                     if(*red   == pal[c].peRed   &&
3952                        *green == pal[c].peGreen &&
3953                        *blue  == pal[c].peBlue)
3954                     {
3955                         *((BYTE *) dest + y * width + x) = c;
3956                         break;
3957                     }
3958                 }
3959             }
3960         }
3961         HeapFree(GetProcessHeap(), 0, mem);
3962     }
3963 }
3964
3965 /* Read the framebuffer contents into a texture */
3966 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
3967 {
3968     struct wined3d_device *device = surface->resource.device;
3969     const struct wined3d_gl_info *gl_info;
3970     struct wined3d_context *context;
3971
3972     if (!surface_is_offscreen(surface))
3973     {
3974         /* We would need to flip onscreen surfaces, but there's no efficient
3975          * way to do that here. It makes more sense for the caller to
3976          * explicitly go through sysmem. */
3977         ERR("Not supported for onscreen targets.\n");
3978         return;
3979     }
3980
3981     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
3982      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
3983      * states in the stateblock, and no driver was found yet that had bugs in that regard.
3984      */
3985     context = context_acquire(device, surface);
3986     gl_info = context->gl_info;
3987
3988     surface_prepare_texture(surface, gl_info, srgb);
3989     surface_bind_and_dirtify(surface, gl_info, srgb);
3990
3991     TRACE("Reading back offscreen render target %p.\n", surface);
3992
3993     ENTER_GL();
3994
3995     glReadBuffer(device->offscreenBuffer);
3996     checkGLcall("glReadBuffer");
3997
3998     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
3999             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4000     checkGLcall("glCopyTexSubImage2D");
4001
4002     LEAVE_GL();
4003
4004     context_release(context);
4005 }
4006
4007 /* Context activation is done by the caller. */
4008 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4009         const struct wined3d_gl_info *gl_info, BOOL srgb)
4010 {
4011     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4012     CONVERT_TYPES convert;
4013     struct wined3d_format format;
4014
4015     if (surface->flags & alloc_flag) return;
4016
4017     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4018     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4019     else surface->flags &= ~SFLAG_CONVERTED;
4020
4021     surface_bind_and_dirtify(surface, gl_info, srgb);
4022     surface_allocate_surface(surface, gl_info, &format, srgb);
4023     surface->flags |= alloc_flag;
4024 }
4025
4026 /* Context activation is done by the caller. */
4027 void surface_prepare_texture(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
4028 {
4029     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4030     {
4031         struct wined3d_texture *texture = surface->container.u.texture;
4032         UINT sub_count = texture->level_count * texture->layer_count;
4033         UINT i;
4034
4035         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4036
4037         for (i = 0; i < sub_count; ++i)
4038         {
4039             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4040             surface_prepare_texture_internal(s, gl_info, srgb);
4041         }
4042
4043         return;
4044     }
4045
4046     surface_prepare_texture_internal(surface, gl_info, srgb);
4047 }
4048
4049 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4050         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4051 {
4052     struct wined3d_device *device = surface->resource.device;
4053     UINT pitch = wined3d_surface_get_pitch(surface);
4054     const struct wined3d_gl_info *gl_info;
4055     struct wined3d_context *context;
4056     RECT local_rect;
4057     UINT w, h;
4058
4059     surface_get_rect(surface, rect, &local_rect);
4060
4061     mem += local_rect.top * pitch + local_rect.left * bpp;
4062     w = local_rect.right - local_rect.left;
4063     h = local_rect.bottom - local_rect.top;
4064
4065     /* Activate the correct context for the render target */
4066     context = context_acquire(device, surface);
4067     context_apply_blit_state(context, device);
4068     gl_info = context->gl_info;
4069
4070     ENTER_GL();
4071
4072     if (!surface_is_offscreen(surface))
4073     {
4074         GLenum buffer = surface_get_gl_buffer(surface);
4075         TRACE("Unlocking %#x buffer.\n", buffer);
4076         context_set_draw_buffer(context, buffer);
4077
4078         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4079         glPixelZoom(1.0f, -1.0f);
4080     }
4081     else
4082     {
4083         /* Primary offscreen render target */
4084         TRACE("Offscreen render target.\n");
4085         context_set_draw_buffer(context, device->offscreenBuffer);
4086
4087         glPixelZoom(1.0f, 1.0f);
4088     }
4089
4090     glRasterPos3i(local_rect.left, local_rect.top, 1);
4091     checkGLcall("glRasterPos3i");
4092
4093     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4094     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4095
4096     if (surface->flags & SFLAG_PBO)
4097     {
4098         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4099         checkGLcall("glBindBufferARB");
4100     }
4101
4102     glDrawPixels(w, h, fmt, type, mem);
4103     checkGLcall("glDrawPixels");
4104
4105     if (surface->flags & SFLAG_PBO)
4106     {
4107         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4108         checkGLcall("glBindBufferARB");
4109     }
4110
4111     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4112     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4113
4114     LEAVE_GL();
4115
4116     if (wined3d_settings.strict_draw_ordering
4117             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4118             && surface->container.u.swapchain->front_buffer == surface))
4119         wglFlush();
4120
4121     context_release(context);
4122 }
4123
4124 HRESULT d3dfmt_get_conv(struct wined3d_surface *surface, BOOL need_alpha_ck,
4125         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4126 {
4127     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4128     struct wined3d_device *device = surface->resource.device;
4129     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4130     BOOL blit_supported = FALSE;
4131
4132     /* Copy the default values from the surface. Below we might perform fixups */
4133     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4134     *format = *surface->resource.format;
4135     *convert = NO_CONVERSION;
4136
4137     /* Ok, now look if we have to do any conversion */
4138     switch (surface->resource.format->id)
4139     {
4140         case WINED3DFMT_P8_UINT:
4141             /* Below the call to blit_supported is disabled for Wine 1.2
4142              * because the function isn't operating correctly yet. At the
4143              * moment 8-bit blits are handled in software and if certain GL
4144              * extensions are around, surface conversion is performed at
4145              * upload time. The blit_supported call recognizes it as a
4146              * destination fixup. This type of upload 'fixup' and 8-bit to
4147              * 8-bit blits need to be handled by the blit_shader.
4148              * TODO: get rid of this #if 0. */
4149 #if 0
4150             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4151                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4152                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4153 #endif
4154             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4155
4156             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4157              * texturing. Further also use conversion in case of color keying.
4158              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4159              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4160              * conflicts with this.
4161              */
4162             if (!((blit_supported && device->render_targets && surface == device->render_targets[0]))
4163                     || colorkey_active || !use_texturing)
4164             {
4165                 format->glFormat = GL_RGBA;
4166                 format->glInternal = GL_RGBA;
4167                 format->glType = GL_UNSIGNED_BYTE;
4168                 format->conv_byte_count = 4;
4169                 if (colorkey_active)
4170                     *convert = CONVERT_PALETTED_CK;
4171                 else
4172                     *convert = CONVERT_PALETTED;
4173             }
4174             break;
4175
4176         case WINED3DFMT_B2G3R3_UNORM:
4177             /* **********************
4178                 GL_UNSIGNED_BYTE_3_3_2
4179                 ********************** */
4180             if (colorkey_active) {
4181                 /* This texture format will never be used.. So do not care about color keying
4182                     up until the point in time it will be needed :-) */
4183                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4184             }
4185             break;
4186
4187         case WINED3DFMT_B5G6R5_UNORM:
4188             if (colorkey_active)
4189             {
4190                 *convert = CONVERT_CK_565;
4191                 format->glFormat = GL_RGBA;
4192                 format->glInternal = GL_RGB5_A1;
4193                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4194                 format->conv_byte_count = 2;
4195             }
4196             break;
4197
4198         case WINED3DFMT_B5G5R5X1_UNORM:
4199             if (colorkey_active)
4200             {
4201                 *convert = CONVERT_CK_5551;
4202                 format->glFormat = GL_BGRA;
4203                 format->glInternal = GL_RGB5_A1;
4204                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4205                 format->conv_byte_count = 2;
4206             }
4207             break;
4208
4209         case WINED3DFMT_B8G8R8_UNORM:
4210             if (colorkey_active)
4211             {
4212                 *convert = CONVERT_CK_RGB24;
4213                 format->glFormat = GL_RGBA;
4214                 format->glInternal = GL_RGBA8;
4215                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4216                 format->conv_byte_count = 4;
4217             }
4218             break;
4219
4220         case WINED3DFMT_B8G8R8X8_UNORM:
4221             if (colorkey_active)
4222             {
4223                 *convert = CONVERT_RGB32_888;
4224                 format->glFormat = GL_RGBA;
4225                 format->glInternal = GL_RGBA8;
4226                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4227                 format->conv_byte_count = 4;
4228             }
4229             break;
4230
4231         default:
4232             break;
4233     }
4234
4235     return WINED3D_OK;
4236 }
4237
4238 void d3dfmt_p8_init_palette(struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4239 {
4240     struct wined3d_device *device = surface->resource.device;
4241     struct wined3d_palette *pal = surface->palette;
4242     BOOL index_in_alpha = FALSE;
4243     unsigned int i;
4244
4245     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4246      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4247      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4248      * duplicate entries. Store the color key in the unused alpha component to speed the
4249      * download up and to make conversion unneeded. */
4250     index_in_alpha = primary_render_target_is_p8(device);
4251
4252     if (!pal)
4253     {
4254         UINT dxVersion = device->wined3d->dxVersion;
4255
4256         /* In DirectDraw the palette is a property of the surface, there are no such things as device palettes. */
4257         if (dxVersion <= 7)
4258         {
4259             ERR("This code should never get entered for DirectDraw!, expect problems\n");
4260             if (index_in_alpha)
4261             {
4262                 /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4263                  * there's no palette at this time. */
4264                 for (i = 0; i < 256; i++) table[i][3] = i;
4265             }
4266         }
4267         else
4268         {
4269             /* Direct3D >= 8 palette usage style: P8 textures use device palettes, palette entry format is A8R8G8B8,
4270              * alpha is stored in peFlags and may be used by the app if D3DPTEXTURECAPS_ALPHAPALETTE device
4271              * capability flag is present (wine does advertise this capability) */
4272             for (i = 0; i < 256; ++i)
4273             {
4274                 table[i][0] = device->palettes[device->currentPalette][i].peRed;
4275                 table[i][1] = device->palettes[device->currentPalette][i].peGreen;
4276                 table[i][2] = device->palettes[device->currentPalette][i].peBlue;
4277                 table[i][3] = device->palettes[device->currentPalette][i].peFlags;
4278             }
4279         }
4280     }
4281     else
4282     {
4283         TRACE("Using surface palette %p\n", pal);
4284         /* Get the surface's palette */
4285         for (i = 0; i < 256; ++i)
4286         {
4287             table[i][0] = pal->palents[i].peRed;
4288             table[i][1] = pal->palents[i].peGreen;
4289             table[i][2] = pal->palents[i].peBlue;
4290
4291             /* When index_in_alpha is set the palette index is stored in the
4292              * alpha component. In case of a readback we can then read
4293              * GL_ALPHA. Color keying is handled in BltOverride using a
4294              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4295              * color key itself is passed to glAlphaFunc in other cases the
4296              * alpha component of pixels that should be masked away is set to 0. */
4297             if (index_in_alpha)
4298             {
4299                 table[i][3] = i;
4300             }
4301             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4302                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4303             {
4304                 table[i][3] = 0x00;
4305             }
4306             else if (pal->flags & WINEDDPCAPS_ALPHA)
4307             {
4308                 table[i][3] = pal->palents[i].peFlags;
4309             }
4310             else
4311             {
4312                 table[i][3] = 0xFF;
4313             }
4314         }
4315     }
4316 }
4317
4318 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4319         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4320 {
4321     const BYTE *source;
4322     BYTE *dest;
4323     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4324
4325     switch (convert) {
4326         case NO_CONVERSION:
4327         {
4328             memcpy(dst, src, pitch * height);
4329             break;
4330         }
4331         case CONVERT_PALETTED:
4332         case CONVERT_PALETTED_CK:
4333         {
4334             BYTE table[256][4];
4335             unsigned int x, y;
4336
4337             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4338
4339             for (y = 0; y < height; y++)
4340             {
4341                 source = src + pitch * y;
4342                 dest = dst + outpitch * y;
4343                 /* This is an 1 bpp format, using the width here is fine */
4344                 for (x = 0; x < width; x++) {
4345                     BYTE color = *source++;
4346                     *dest++ = table[color][0];
4347                     *dest++ = table[color][1];
4348                     *dest++ = table[color][2];
4349                     *dest++ = table[color][3];
4350                 }
4351             }
4352         }
4353         break;
4354
4355         case CONVERT_CK_565:
4356         {
4357             /* Converting the 565 format in 5551 packed to emulate color-keying.
4358
4359               Note : in all these conversion, it would be best to average the averaging
4360                       pixels to get the color of the pixel that will be color-keyed to
4361                       prevent 'color bleeding'. This will be done later on if ever it is
4362                       too visible.
4363
4364               Note2: Nvidia documents say that their driver does not support alpha + color keying
4365                      on the same surface and disables color keying in such a case
4366             */
4367             unsigned int x, y;
4368             const WORD *Source;
4369             WORD *Dest;
4370
4371             TRACE("Color keyed 565\n");
4372
4373             for (y = 0; y < height; y++) {
4374                 Source = (const WORD *)(src + y * pitch);
4375                 Dest = (WORD *) (dst + y * outpitch);
4376                 for (x = 0; x < width; x++ ) {
4377                     WORD color = *Source++;
4378                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4379                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4380                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4381                         *Dest |= 0x0001;
4382                     Dest++;
4383                 }
4384             }
4385         }
4386         break;
4387
4388         case CONVERT_CK_5551:
4389         {
4390             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4391             unsigned int x, y;
4392             const WORD *Source;
4393             WORD *Dest;
4394             TRACE("Color keyed 5551\n");
4395             for (y = 0; y < height; y++) {
4396                 Source = (const WORD *)(src + y * pitch);
4397                 Dest = (WORD *) (dst + y * outpitch);
4398                 for (x = 0; x < width; x++ ) {
4399                     WORD color = *Source++;
4400                     *Dest = color;
4401                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4402                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4403                         *Dest |= (1 << 15);
4404                     else
4405                         *Dest &= ~(1 << 15);
4406                     Dest++;
4407                 }
4408             }
4409         }
4410         break;
4411
4412         case CONVERT_CK_RGB24:
4413         {
4414             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4415             unsigned int x, y;
4416             for (y = 0; y < height; y++)
4417             {
4418                 source = src + pitch * y;
4419                 dest = dst + outpitch * y;
4420                 for (x = 0; x < width; x++) {
4421                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4422                     DWORD dstcolor = color << 8;
4423                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4424                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4425                         dstcolor |= 0xff;
4426                     *(DWORD*)dest = dstcolor;
4427                     source += 3;
4428                     dest += 4;
4429                 }
4430             }
4431         }
4432         break;
4433
4434         case CONVERT_RGB32_888:
4435         {
4436             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4437             unsigned int x, y;
4438             for (y = 0; y < height; y++)
4439             {
4440                 source = src + pitch * y;
4441                 dest = dst + outpitch * y;
4442                 for (x = 0; x < width; x++) {
4443                     DWORD color = 0xffffff & *(const DWORD*)source;
4444                     DWORD dstcolor = color << 8;
4445                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4446                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4447                         dstcolor |= 0xff;
4448                     *(DWORD*)dest = dstcolor;
4449                     source += 4;
4450                     dest += 4;
4451                 }
4452             }
4453         }
4454         break;
4455
4456         default:
4457             ERR("Unsupported conversion type %#x.\n", convert);
4458     }
4459     return WINED3D_OK;
4460 }
4461
4462 BOOL palette9_changed(struct wined3d_surface *surface)
4463 {
4464     struct wined3d_device *device = surface->resource.device;
4465
4466     if (surface->palette || (surface->resource.format->id != WINED3DFMT_P8_UINT
4467             && surface->resource.format->id != WINED3DFMT_P8_UINT_A8_UNORM))
4468     {
4469         /* If a ddraw-style palette is attached assume no d3d9 palette change.
4470          * Also the palette isn't interesting if the surface format isn't P8 or A8P8
4471          */
4472         return FALSE;
4473     }
4474
4475     if (surface->palette9)
4476     {
4477         if (!memcmp(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256))
4478         {
4479             return FALSE;
4480         }
4481     }
4482     else
4483     {
4484         surface->palette9 = HeapAlloc(GetProcessHeap(), 0, sizeof(PALETTEENTRY) * 256);
4485     }
4486     memcpy(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256);
4487
4488     return TRUE;
4489 }
4490
4491 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4492 {
4493     /* Flip the surface contents */
4494     /* Flip the DC */
4495     {
4496         HDC tmp;
4497         tmp = front->hDC;
4498         front->hDC = back->hDC;
4499         back->hDC = tmp;
4500     }
4501
4502     /* Flip the DIBsection */
4503     {
4504         HBITMAP tmp;
4505         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4506         tmp = front->dib.DIBsection;
4507         front->dib.DIBsection = back->dib.DIBsection;
4508         back->dib.DIBsection = tmp;
4509
4510         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4511         else front->flags &= ~SFLAG_DIBSECTION;
4512         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4513         else back->flags &= ~SFLAG_DIBSECTION;
4514     }
4515
4516     /* Flip the surface data */
4517     {
4518         void* tmp;
4519
4520         tmp = front->dib.bitmap_data;
4521         front->dib.bitmap_data = back->dib.bitmap_data;
4522         back->dib.bitmap_data = tmp;
4523
4524         tmp = front->resource.allocatedMemory;
4525         front->resource.allocatedMemory = back->resource.allocatedMemory;
4526         back->resource.allocatedMemory = tmp;
4527
4528         tmp = front->resource.heapMemory;
4529         front->resource.heapMemory = back->resource.heapMemory;
4530         back->resource.heapMemory = tmp;
4531     }
4532
4533     /* Flip the PBO */
4534     {
4535         GLuint tmp_pbo = front->pbo;
4536         front->pbo = back->pbo;
4537         back->pbo = tmp_pbo;
4538     }
4539
4540     /* client_memory should not be different, but just in case */
4541     {
4542         BOOL tmp;
4543         tmp = front->dib.client_memory;
4544         front->dib.client_memory = back->dib.client_memory;
4545         back->dib.client_memory = tmp;
4546     }
4547
4548     /* Flip the opengl texture */
4549     {
4550         GLuint tmp;
4551
4552         tmp = back->texture_name;
4553         back->texture_name = front->texture_name;
4554         front->texture_name = tmp;
4555
4556         tmp = back->texture_name_srgb;
4557         back->texture_name_srgb = front->texture_name_srgb;
4558         front->texture_name_srgb = tmp;
4559
4560         resource_unload(&back->resource);
4561         resource_unload(&front->resource);
4562     }
4563
4564     {
4565         DWORD tmp_flags = back->flags;
4566         back->flags = front->flags;
4567         front->flags = tmp_flags;
4568     }
4569 }
4570
4571 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4572  * pixel copy calls. */
4573 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4574         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4575 {
4576     struct wined3d_device *device = dst_surface->resource.device;
4577     float xrel, yrel;
4578     UINT row;
4579     struct wined3d_context *context;
4580     BOOL upsidedown = FALSE;
4581     RECT dst_rect = *dst_rect_in;
4582
4583     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4584      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4585      */
4586     if(dst_rect.top > dst_rect.bottom) {
4587         UINT tmp = dst_rect.bottom;
4588         dst_rect.bottom = dst_rect.top;
4589         dst_rect.top = tmp;
4590         upsidedown = TRUE;
4591     }
4592
4593     context = context_acquire(device, src_surface);
4594     context_apply_blit_state(context, device);
4595     surface_internal_preload(dst_surface, SRGB_RGB);
4596     ENTER_GL();
4597
4598     /* Bind the target texture */
4599     glBindTexture(dst_surface->texture_target, dst_surface->texture_name);
4600     checkGLcall("glBindTexture");
4601     if (surface_is_offscreen(src_surface))
4602     {
4603         TRACE("Reading from an offscreen target\n");
4604         upsidedown = !upsidedown;
4605         glReadBuffer(device->offscreenBuffer);
4606     }
4607     else
4608     {
4609         glReadBuffer(surface_get_gl_buffer(src_surface));
4610     }
4611     checkGLcall("glReadBuffer");
4612
4613     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4614     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4615
4616     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4617     {
4618         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4619
4620         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4621             ERR("Texture filtering not supported in direct blit\n");
4622         }
4623     }
4624     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4625             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4626     {
4627         ERR("Texture filtering not supported in direct blit\n");
4628     }
4629
4630     if (upsidedown
4631             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4632             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4633     {
4634         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4635
4636         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4637                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4638                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4639                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4640     }
4641     else
4642     {
4643         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4644         /* I have to process this row by row to swap the image,
4645          * otherwise it would be upside down, so stretching in y direction
4646          * doesn't cost extra time
4647          *
4648          * However, stretching in x direction can be avoided if not necessary
4649          */
4650         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4651             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4652             {
4653                 /* Well, that stuff works, but it's very slow.
4654                  * find a better way instead
4655                  */
4656                 UINT col;
4657
4658                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4659                 {
4660                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4661                             dst_rect.left + col /* x offset */, row /* y offset */,
4662                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4663                 }
4664             }
4665             else
4666             {
4667                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4668                         dst_rect.left /* x offset */, row /* y offset */,
4669                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4670             }
4671         }
4672     }
4673     checkGLcall("glCopyTexSubImage2D");
4674
4675     LEAVE_GL();
4676     context_release(context);
4677
4678     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4679      * path is never entered
4680      */
4681     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4682 }
4683
4684 /* Uses the hardware to stretch and flip the image */
4685 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4686         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4687 {
4688     struct wined3d_device *device = dst_surface->resource.device;
4689     struct wined3d_swapchain *src_swapchain = NULL;
4690     GLuint src, backup = 0;
4691     float left, right, top, bottom; /* Texture coordinates */
4692     UINT fbwidth = src_surface->resource.width;
4693     UINT fbheight = src_surface->resource.height;
4694     struct wined3d_context *context;
4695     GLenum drawBuffer = GL_BACK;
4696     GLenum texture_target;
4697     BOOL noBackBufferBackup;
4698     BOOL src_offscreen;
4699     BOOL upsidedown = FALSE;
4700     RECT dst_rect = *dst_rect_in;
4701
4702     TRACE("Using hwstretch blit\n");
4703     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4704     context = context_acquire(device, src_surface);
4705     context_apply_blit_state(context, device);
4706     surface_internal_preload(dst_surface, SRGB_RGB);
4707
4708     src_offscreen = surface_is_offscreen(src_surface);
4709     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4710     if (!noBackBufferBackup && !src_surface->texture_name)
4711     {
4712         /* Get it a description */
4713         surface_internal_preload(src_surface, SRGB_RGB);
4714     }
4715     ENTER_GL();
4716
4717     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4718      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4719      */
4720     if (context->aux_buffers >= 2)
4721     {
4722         /* Got more than one aux buffer? Use the 2nd aux buffer */
4723         drawBuffer = GL_AUX1;
4724     }
4725     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4726     {
4727         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4728         drawBuffer = GL_AUX0;
4729     }
4730
4731     if(noBackBufferBackup) {
4732         glGenTextures(1, &backup);
4733         checkGLcall("glGenTextures");
4734         glBindTexture(GL_TEXTURE_2D, backup);
4735         checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4736         texture_target = GL_TEXTURE_2D;
4737     } else {
4738         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4739          * we are reading from the back buffer, the backup can be used as source texture
4740          */
4741         texture_target = src_surface->texture_target;
4742         glBindTexture(texture_target, src_surface->texture_name);
4743         checkGLcall("glBindTexture(texture_target, src_surface->texture_name)");
4744         glEnable(texture_target);
4745         checkGLcall("glEnable(texture_target)");
4746
4747         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4748         src_surface->flags &= ~SFLAG_INTEXTURE;
4749     }
4750
4751     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4752      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4753      */
4754     if(dst_rect.top > dst_rect.bottom) {
4755         UINT tmp = dst_rect.bottom;
4756         dst_rect.bottom = dst_rect.top;
4757         dst_rect.top = tmp;
4758         upsidedown = TRUE;
4759     }
4760
4761     if (src_offscreen)
4762     {
4763         TRACE("Reading from an offscreen target\n");
4764         upsidedown = !upsidedown;
4765         glReadBuffer(device->offscreenBuffer);
4766     }
4767     else
4768     {
4769         glReadBuffer(surface_get_gl_buffer(src_surface));
4770     }
4771
4772     /* TODO: Only back up the part that will be overwritten */
4773     glCopyTexSubImage2D(texture_target, 0,
4774                         0, 0 /* read offsets */,
4775                         0, 0,
4776                         fbwidth,
4777                         fbheight);
4778
4779     checkGLcall("glCopyTexSubImage2D");
4780
4781     /* No issue with overriding these - the sampler is dirty due to blit usage */
4782     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
4783             wined3d_gl_mag_filter(magLookup, Filter));
4784     checkGLcall("glTexParameteri");
4785     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
4786             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
4787     checkGLcall("glTexParameteri");
4788
4789     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4790         src_swapchain = src_surface->container.u.swapchain;
4791     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
4792     {
4793         src = backup ? backup : src_surface->texture_name;
4794     }
4795     else
4796     {
4797         glReadBuffer(GL_FRONT);
4798         checkGLcall("glReadBuffer(GL_FRONT)");
4799
4800         glGenTextures(1, &src);
4801         checkGLcall("glGenTextures(1, &src)");
4802         glBindTexture(GL_TEXTURE_2D, src);
4803         checkGLcall("glBindTexture(GL_TEXTURE_2D, src)");
4804
4805         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
4806          * out for power of 2 sizes
4807          */
4808         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
4809                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
4810         checkGLcall("glTexImage2D");
4811         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
4812                             0, 0 /* read offsets */,
4813                             0, 0,
4814                             fbwidth,
4815                             fbheight);
4816
4817         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
4818         checkGLcall("glTexParameteri");
4819         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
4820         checkGLcall("glTexParameteri");
4821
4822         glReadBuffer(GL_BACK);
4823         checkGLcall("glReadBuffer(GL_BACK)");
4824
4825         if(texture_target != GL_TEXTURE_2D) {
4826             glDisable(texture_target);
4827             glEnable(GL_TEXTURE_2D);
4828             texture_target = GL_TEXTURE_2D;
4829         }
4830     }
4831     checkGLcall("glEnd and previous");
4832
4833     left = src_rect->left;
4834     right = src_rect->right;
4835
4836     if (!upsidedown)
4837     {
4838         top = src_surface->resource.height - src_rect->top;
4839         bottom = src_surface->resource.height - src_rect->bottom;
4840     }
4841     else
4842     {
4843         top = src_surface->resource.height - src_rect->bottom;
4844         bottom = src_surface->resource.height - src_rect->top;
4845     }
4846
4847     if (src_surface->flags & SFLAG_NORMCOORD)
4848     {
4849         left /= src_surface->pow2Width;
4850         right /= src_surface->pow2Width;
4851         top /= src_surface->pow2Height;
4852         bottom /= src_surface->pow2Height;
4853     }
4854
4855     /* draw the source texture stretched and upside down. The correct surface is bound already */
4856     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
4857     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
4858
4859     context_set_draw_buffer(context, drawBuffer);
4860     glReadBuffer(drawBuffer);
4861
4862     glBegin(GL_QUADS);
4863         /* bottom left */
4864         glTexCoord2f(left, bottom);
4865         glVertex2i(0, 0);
4866
4867         /* top left */
4868         glTexCoord2f(left, top);
4869         glVertex2i(0, dst_rect.bottom - dst_rect.top);
4870
4871         /* top right */
4872         glTexCoord2f(right, top);
4873         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4874
4875         /* bottom right */
4876         glTexCoord2f(right, bottom);
4877         glVertex2i(dst_rect.right - dst_rect.left, 0);
4878     glEnd();
4879     checkGLcall("glEnd and previous");
4880
4881     if (texture_target != dst_surface->texture_target)
4882     {
4883         glDisable(texture_target);
4884         glEnable(dst_surface->texture_target);
4885         texture_target = dst_surface->texture_target;
4886     }
4887
4888     /* Now read the stretched and upside down image into the destination texture */
4889     glBindTexture(texture_target, dst_surface->texture_name);
4890     checkGLcall("glBindTexture");
4891     glCopyTexSubImage2D(texture_target,
4892                         0,
4893                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
4894                         0, 0, /* We blitted the image to the origin */
4895                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4896     checkGLcall("glCopyTexSubImage2D");
4897
4898     if(drawBuffer == GL_BACK) {
4899         /* Write the back buffer backup back */
4900         if(backup) {
4901             if(texture_target != GL_TEXTURE_2D) {
4902                 glDisable(texture_target);
4903                 glEnable(GL_TEXTURE_2D);
4904                 texture_target = GL_TEXTURE_2D;
4905             }
4906             glBindTexture(GL_TEXTURE_2D, backup);
4907             checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4908         }
4909         else
4910         {
4911             if (texture_target != src_surface->texture_target)
4912             {
4913                 glDisable(texture_target);
4914                 glEnable(src_surface->texture_target);
4915                 texture_target = src_surface->texture_target;
4916             }
4917             glBindTexture(src_surface->texture_target, src_surface->texture_name);
4918             checkGLcall("glBindTexture(src_surface->texture_target, src_surface->texture_name)");
4919         }
4920
4921         glBegin(GL_QUADS);
4922             /* top left */
4923             glTexCoord2f(0.0f, 0.0f);
4924             glVertex2i(0, fbheight);
4925
4926             /* bottom left */
4927             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
4928             glVertex2i(0, 0);
4929
4930             /* bottom right */
4931             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
4932                     (float)fbheight / (float)src_surface->pow2Height);
4933             glVertex2i(fbwidth, 0);
4934
4935             /* top right */
4936             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
4937             glVertex2i(fbwidth, fbheight);
4938         glEnd();
4939     }
4940     glDisable(texture_target);
4941     checkGLcall("glDisable(texture_target)");
4942
4943     /* Cleanup */
4944     if (src != src_surface->texture_name && src != backup)
4945     {
4946         glDeleteTextures(1, &src);
4947         checkGLcall("glDeleteTextures(1, &src)");
4948     }
4949     if(backup) {
4950         glDeleteTextures(1, &backup);
4951         checkGLcall("glDeleteTextures(1, &backup)");
4952     }
4953
4954     LEAVE_GL();
4955
4956     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
4957
4958     context_release(context);
4959
4960     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4961      * path is never entered
4962      */
4963     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4964 }
4965
4966 /* Front buffer coordinates are always full screen coordinates, but our GL
4967  * drawable is limited to the window's client area. The sysmem and texture
4968  * copies do have the full screen size. Note that GL has a bottom-left
4969  * origin, while D3D has a top-left origin. */
4970 void surface_translate_drawable_coords(struct wined3d_surface *surface, HWND window, RECT *rect)
4971 {
4972     UINT drawable_height;
4973
4974     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4975             && surface == surface->container.u.swapchain->front_buffer)
4976     {
4977         POINT offset = {0, 0};
4978         RECT windowsize;
4979
4980         ScreenToClient(window, &offset);
4981         OffsetRect(rect, offset.x, offset.y);
4982
4983         GetClientRect(window, &windowsize);
4984         drawable_height = windowsize.bottom - windowsize.top;
4985     }
4986     else
4987     {
4988         drawable_height = surface->resource.height;
4989     }
4990
4991     rect->top = drawable_height - rect->top;
4992     rect->bottom = drawable_height - rect->bottom;
4993 }
4994
4995 /* blit between surface locations. onscreen on different swapchains is not supported.
4996  * depth / stencil is not supported. */
4997 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
4998         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
4999         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
5000 {
5001     const struct wined3d_gl_info *gl_info;
5002     struct wined3d_context *context;
5003     RECT src_rect, dst_rect;
5004     GLenum gl_filter;
5005
5006     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
5007     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
5008             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
5009     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
5010             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
5011
5012     src_rect = *src_rect_in;
5013     dst_rect = *dst_rect_in;
5014
5015     switch (filter)
5016     {
5017         case WINED3DTEXF_LINEAR:
5018             gl_filter = GL_LINEAR;
5019             break;
5020
5021         default:
5022             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
5023         case WINED3DTEXF_NONE:
5024         case WINED3DTEXF_POINT:
5025             gl_filter = GL_NEAREST;
5026             break;
5027     }
5028
5029     if (src_location == SFLAG_INDRAWABLE && surface_is_offscreen(src_surface))
5030         src_location = SFLAG_INTEXTURE;
5031     if (dst_location == SFLAG_INDRAWABLE && surface_is_offscreen(dst_surface))
5032         dst_location = SFLAG_INTEXTURE;
5033
5034     /* Make sure the locations are up-to-date. Loading the destination
5035      * surface isn't required if the entire surface is overwritten. (And is
5036      * in fact harmful if we're being called by surface_load_location() with
5037      * the purpose of loading the destination surface.) */
5038     surface_load_location(src_surface, src_location, NULL);
5039     if (!surface_is_full_rect(dst_surface, &dst_rect))
5040         surface_load_location(dst_surface, dst_location, NULL);
5041
5042     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
5043     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
5044     else context = context_acquire(device, NULL);
5045
5046     if (!context->valid)
5047     {
5048         context_release(context);
5049         WARN("Invalid context, skipping blit.\n");
5050         return;
5051     }
5052
5053     gl_info = context->gl_info;
5054
5055     if (src_location == SFLAG_INDRAWABLE)
5056     {
5057         GLenum buffer = surface_get_gl_buffer(src_surface);
5058
5059         TRACE("Source surface %p is onscreen.\n", src_surface);
5060
5061         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
5062
5063         ENTER_GL();
5064         context_bind_fbo(context, GL_READ_FRAMEBUFFER, NULL);
5065         glReadBuffer(buffer);
5066         checkGLcall("glReadBuffer()");
5067     }
5068     else
5069     {
5070         TRACE("Source surface %p is offscreen.\n", src_surface);
5071         ENTER_GL();
5072         context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
5073         glReadBuffer(GL_COLOR_ATTACHMENT0);
5074         checkGLcall("glReadBuffer()");
5075     }
5076     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
5077     LEAVE_GL();
5078
5079     if (dst_location == SFLAG_INDRAWABLE)
5080     {
5081         GLenum buffer = surface_get_gl_buffer(dst_surface);
5082
5083         TRACE("Destination surface %p is onscreen.\n", dst_surface);
5084
5085         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5086
5087         ENTER_GL();
5088         context_bind_fbo(context, GL_DRAW_FRAMEBUFFER, NULL);
5089         context_set_draw_buffer(context, buffer);
5090     }
5091     else
5092     {
5093         TRACE("Destination surface %p is offscreen.\n", dst_surface);
5094
5095         ENTER_GL();
5096         context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
5097         context_set_draw_buffer(context, GL_COLOR_ATTACHMENT0);
5098     }
5099     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
5100
5101     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
5102     IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
5103     IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
5104     IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
5105     IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
5106
5107     glDisable(GL_SCISSOR_TEST);
5108     IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
5109
5110     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
5111             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
5112     checkGLcall("glBlitFramebuffer()");
5113
5114     LEAVE_GL();
5115
5116     if (wined3d_settings.strict_draw_ordering
5117             || (dst_location == SFLAG_INDRAWABLE
5118             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
5119         wglFlush();
5120
5121     context_release(context);
5122 }
5123
5124 static void surface_blt_to_drawable(struct wined3d_device *device,
5125         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5126         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5127         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5128 {
5129     struct wined3d_context *context;
5130     RECT src_rect, dst_rect;
5131
5132     src_rect = *src_rect_in;
5133     dst_rect = *dst_rect_in;
5134
5135     /* Make sure the surface is up-to-date. This should probably use
5136      * surface_load_location() and worry about the destination surface too,
5137      * unless we're overwriting it completely. */
5138     surface_internal_preload(src_surface, SRGB_RGB);
5139
5140     /* Activate the destination context, set it up for blitting */
5141     context = context_acquire(device, dst_surface);
5142     context_apply_blit_state(context, device);
5143
5144     if (!surface_is_offscreen(dst_surface))
5145         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5146
5147     device->blitter->set_shader(device->blit_priv, context->gl_info, src_surface);
5148
5149     ENTER_GL();
5150
5151     if (color_key)
5152     {
5153         glEnable(GL_ALPHA_TEST);
5154         checkGLcall("glEnable(GL_ALPHA_TEST)");
5155
5156         /* When the primary render target uses P8, the alpha component
5157          * contains the palette index. Which means that the colorkey is one of
5158          * the palette entries. In other cases pixels that should be masked
5159          * away have alpha set to 0. */
5160         if (primary_render_target_is_p8(device))
5161             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5162         else
5163             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5164         checkGLcall("glAlphaFunc");
5165     }
5166     else
5167     {
5168         glDisable(GL_ALPHA_TEST);
5169         checkGLcall("glDisable(GL_ALPHA_TEST)");
5170     }
5171
5172     draw_textured_quad(src_surface, &src_rect, &dst_rect, filter);
5173
5174     if (color_key)
5175     {
5176         glDisable(GL_ALPHA_TEST);
5177         checkGLcall("glDisable(GL_ALPHA_TEST)");
5178     }
5179
5180     LEAVE_GL();
5181
5182     /* Leave the opengl state valid for blitting */
5183     device->blitter->unset_shader(context->gl_info);
5184
5185     if (wined3d_settings.strict_draw_ordering
5186             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5187             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5188         wglFlush(); /* Flush to ensure ordering across contexts. */
5189
5190     context_release(context);
5191 }
5192
5193 /* Do not call while under the GL lock. */
5194 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5195 {
5196     struct wined3d_device *device = s->resource.device;
5197     const struct blit_shader *blitter;
5198
5199     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5200             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5201     if (!blitter)
5202     {
5203         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5204         return WINED3DERR_INVALIDCALL;
5205     }
5206
5207     return blitter->color_fill(device, s, rect, color);
5208 }
5209
5210 /* Do not call while under the GL lock. */
5211 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *DestRect,
5212         struct wined3d_surface *src_surface, const RECT *SrcRect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5213         WINED3DTEXTUREFILTERTYPE Filter)
5214 {
5215     struct wined3d_device *device = dst_surface->resource.device;
5216     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5217     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5218     RECT dst_rect, src_rect;
5219
5220     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5221             dst_surface, wine_dbgstr_rect(DestRect), src_surface, wine_dbgstr_rect(SrcRect),
5222             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5223
5224     /* Get the swapchain. One of the surfaces has to be a primary surface */
5225     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5226     {
5227         WARN("Destination is in sysmem, rejecting gl blt\n");
5228         return WINED3DERR_INVALIDCALL;
5229     }
5230
5231     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5232         dstSwapchain = dst_surface->container.u.swapchain;
5233
5234     if (src_surface)
5235     {
5236         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5237         {
5238             WARN("Src is in sysmem, rejecting gl blt\n");
5239             return WINED3DERR_INVALIDCALL;
5240         }
5241
5242         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5243             srcSwapchain = src_surface->container.u.swapchain;
5244     }
5245
5246     /* Early sort out of cases where no render target is used */
5247     if (!dstSwapchain && !srcSwapchain
5248             && src_surface != device->render_targets[0]
5249             && dst_surface != device->render_targets[0])
5250     {
5251         TRACE("No surface is render target, not using hardware blit.\n");
5252         return WINED3DERR_INVALIDCALL;
5253     }
5254
5255     /* No destination color keying supported */
5256     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5257     {
5258         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5259         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5260         return WINED3DERR_INVALIDCALL;
5261     }
5262
5263     surface_get_rect(dst_surface, DestRect, &dst_rect);
5264     if (src_surface) surface_get_rect(src_surface, SrcRect, &src_rect);
5265
5266     /* The only case where both surfaces on a swapchain are supported is a back buffer -> front buffer blit on the same swapchain */
5267     if (dstSwapchain && dstSwapchain == srcSwapchain && dstSwapchain->back_buffers
5268             && dst_surface == dstSwapchain->front_buffer
5269             && src_surface == dstSwapchain->back_buffers[0])
5270     {
5271         /* Half-Life does a Blt from the back buffer to the front buffer,
5272          * Full surface size, no flags... Use present instead
5273          *
5274          * This path will only be entered for d3d7 and ddraw apps, because d3d8/9 offer no way to blit TO the front buffer
5275          */
5276
5277         /* Check rects - IWineD3DDevice_Present doesn't handle them */
5278         while(1)
5279         {
5280             TRACE("Looking if a Present can be done...\n");
5281             /* Source Rectangle must be full surface */
5282             if (src_rect.left || src_rect.top
5283                     || src_rect.right != src_surface->resource.width
5284                     || src_rect.bottom != src_surface->resource.height)
5285             {
5286                 TRACE("No, Source rectangle doesn't match\n");
5287                 break;
5288             }
5289
5290             /* No stretching may occur */
5291             if(src_rect.right != dst_rect.right - dst_rect.left ||
5292                src_rect.bottom != dst_rect.bottom - dst_rect.top) {
5293                 TRACE("No, stretching is done\n");
5294                 break;
5295             }
5296
5297             /* Destination must be full surface or match the clipping rectangle */
5298             if (dst_surface->clipper && dst_surface->clipper->hWnd)
5299             {
5300                 RECT cliprect;
5301                 POINT pos[2];
5302                 GetClientRect(dst_surface->clipper->hWnd, &cliprect);
5303                 pos[0].x = dst_rect.left;
5304                 pos[0].y = dst_rect.top;
5305                 pos[1].x = dst_rect.right;
5306                 pos[1].y = dst_rect.bottom;
5307                 MapWindowPoints(GetDesktopWindow(), dst_surface->clipper->hWnd, pos, 2);
5308
5309                 if(pos[0].x != cliprect.left  || pos[0].y != cliprect.top   ||
5310                    pos[1].x != cliprect.right || pos[1].y != cliprect.bottom)
5311                 {
5312                     TRACE("No, dest rectangle doesn't match(clipper)\n");
5313                     TRACE("Clip rect at %s\n", wine_dbgstr_rect(&cliprect));
5314                     TRACE("Blt dest: %s\n", wine_dbgstr_rect(&dst_rect));
5315                     break;
5316                 }
5317             }
5318             else if (dst_rect.left || dst_rect.top
5319                     || dst_rect.right != dst_surface->resource.width
5320                     || dst_rect.bottom != dst_surface->resource.height)
5321             {
5322                 TRACE("No, dest rectangle doesn't match(surface size)\n");
5323                 break;
5324             }
5325
5326             TRACE("Yes\n");
5327
5328             /* These flags are unimportant for the flag check, remove them */
5329             if (!(flags & ~(WINEDDBLT_DONOTWAIT | WINEDDBLT_WAIT)))
5330             {
5331                 WINED3DSWAPEFFECT orig_swap = dstSwapchain->presentParms.SwapEffect;
5332
5333                 /* The idea behind this is that a glReadPixels and a glDrawPixels call
5334                     * take very long, while a flip is fast.
5335                     * This applies to Half-Life, which does such Blts every time it finished
5336                     * a frame, and to Prince of Persia 3D, which uses this to draw at least the main
5337                     * menu. This is also used by all apps when they do windowed rendering
5338                     *
5339                     * The problem is that flipping is not really the same as copying. After a
5340                     * Blt the front buffer is a copy of the back buffer, and the back buffer is
5341                     * untouched. Therefore it's necessary to override the swap effect
5342                     * and to set it back after the flip.
5343                     *
5344                     * Windowed Direct3D < 7 apps do the same. The D3D7 sdk demos are nice
5345                     * testcases.
5346                     */
5347
5348                 dstSwapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
5349                 dstSwapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
5350
5351                 TRACE("Full screen back buffer -> front buffer blt, performing a flip instead.\n");
5352                 wined3d_swapchain_present(dstSwapchain, NULL, NULL, dstSwapchain->win_handle, NULL, 0);
5353
5354                 dstSwapchain->presentParms.SwapEffect = orig_swap;
5355
5356                 return WINED3D_OK;
5357             }
5358             break;
5359         }
5360
5361         TRACE("Unsupported blit between buffers on the same swapchain\n");
5362         return WINED3DERR_INVALIDCALL;
5363     } else if(dstSwapchain && dstSwapchain == srcSwapchain) {
5364         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5365         return WINED3DERR_INVALIDCALL;
5366     } else if(dstSwapchain && srcSwapchain) {
5367         FIXME("Implement hardware blit between two different swapchains\n");
5368         return WINED3DERR_INVALIDCALL;
5369     }
5370     else if (dstSwapchain)
5371     {
5372         /* Handled with regular texture -> swapchain blit */
5373         if (src_surface == device->render_targets[0])
5374             TRACE("Blit from active render target to a swapchain\n");
5375     }
5376     else if (srcSwapchain && dst_surface == device->render_targets[0])
5377     {
5378         FIXME("Implement blit from a swapchain to the active render target\n");
5379         return WINED3DERR_INVALIDCALL;
5380     }
5381
5382     if ((srcSwapchain || src_surface == device->render_targets[0]) && !dstSwapchain)
5383     {
5384         /* Blit from render target to texture */
5385         BOOL stretchx;
5386
5387         /* P8 read back is not implemented */
5388         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5389                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5390         {
5391             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5392             return WINED3DERR_INVALIDCALL;
5393         }
5394
5395         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5396         {
5397             TRACE("Color keying not supported by frame buffer to texture blit\n");
5398             return WINED3DERR_INVALIDCALL;
5399             /* Destination color key is checked above */
5400         }
5401
5402         if(dst_rect.right - dst_rect.left != src_rect.right - src_rect.left) {
5403             stretchx = TRUE;
5404         } else {
5405             stretchx = FALSE;
5406         }
5407
5408         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5409          * flip the image nor scale it.
5410          *
5411          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5412          * -> If the app wants a image width an unscaled width, copy it line per line
5413          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5414          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5415          *    back buffer. This is slower than reading line per line, thus not used for flipping
5416          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5417          *    pixel by pixel
5418          *
5419          * If EXT_framebuffer_blit is supported that can be used instead. Note that EXT_framebuffer_blit implies
5420          * FBO support, so it doesn't really make sense to try and make it work with different offscreen rendering
5421          * backends. */
5422         if (fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5423                 &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5424                 &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5425         {
5426             surface_blt_fbo(device, Filter,
5427                     src_surface, SFLAG_INDRAWABLE, &src_rect,
5428                     dst_surface, SFLAG_INDRAWABLE, &dst_rect);
5429             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5430         }
5431         else if (!stretchx || dst_rect.right - dst_rect.left > src_surface->resource.width
5432                 || dst_rect.bottom - dst_rect.top > src_surface->resource.height)
5433         {
5434             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5435             fb_copy_to_texture_direct(dst_surface, src_surface, &src_rect, &dst_rect, Filter);
5436         } else {
5437             TRACE("Using hardware stretching to flip / stretch the texture\n");
5438             fb_copy_to_texture_hwstretch(dst_surface, src_surface, &src_rect, &dst_rect, Filter);
5439         }
5440
5441         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5442         {
5443             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5444             dst_surface->resource.allocatedMemory = NULL;
5445             dst_surface->resource.heapMemory = NULL;
5446         }
5447         else
5448         {
5449             dst_surface->flags &= ~SFLAG_INSYSMEM;
5450         }
5451
5452         return WINED3D_OK;
5453     }
5454     else if (src_surface)
5455     {
5456         /* Blit from offscreen surface to render target */
5457         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5458         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5459
5460         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5461
5462         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5463                 && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5464                         &src_rect, src_surface->resource.usage, src_surface->resource.pool,
5465                         src_surface->resource.format,
5466                         &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5467                         dst_surface->resource.format))
5468         {
5469             TRACE("Using surface_blt_fbo.\n");
5470             /* The source is always a texture, but never the currently active render target, and the texture
5471              * contents are never upside down. */
5472             surface_blt_fbo(device, Filter,
5473                     src_surface, SFLAG_INDRAWABLE, &src_rect,
5474                     dst_surface, SFLAG_INDRAWABLE, &dst_rect);
5475             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5476             return WINED3D_OK;
5477         }
5478
5479         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5480                 && arbfp_blit.blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5481                         &src_rect, src_surface->resource.usage, src_surface->resource.pool,
5482                         src_surface->resource.format,
5483                         &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5484                         dst_surface->resource.format))
5485         {
5486             return arbfp_blit_surface(device, src_surface, &src_rect, dst_surface, &dst_rect,
5487                     WINED3D_BLIT_OP_COLOR_BLIT, Filter);
5488         }
5489
5490         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5491                 &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5492                 &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5493         {
5494             FIXME("Unsupported blit operation falling back to software\n");
5495             return WINED3DERR_INVALIDCALL;
5496         }
5497
5498         /* Color keying: Check if we have to do a color keyed blt,
5499          * and if not check if a color key is activated.
5500          *
5501          * Just modify the color keying parameters in the surface and restore them afterwards
5502          * The surface keeps track of the color key last used to load the opengl surface.
5503          * PreLoad will catch the change to the flags and color key and reload if necessary.
5504          */
5505         if (flags & WINEDDBLT_KEYSRC)
5506         {
5507             /* Use color key from surface */
5508         }
5509         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5510         {
5511             /* Use color key from DDBltFx */
5512             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5513             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5514         }
5515         else
5516         {
5517             /* Do not use color key */
5518             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5519         }
5520
5521         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5522                 src_surface, &src_rect, dst_surface, &dst_rect);
5523
5524         /* Restore the color key parameters */
5525         src_surface->CKeyFlags = oldCKeyFlags;
5526         src_surface->SrcBltCKey = oldBltCKey;
5527
5528         surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5529
5530         return WINED3D_OK;
5531     }
5532     else
5533     {
5534         /* Source-Less Blit to render target */
5535         if (flags & WINEDDBLT_COLORFILL)
5536         {
5537             WINED3DCOLORVALUE color;
5538
5539             TRACE("Colorfill\n");
5540
5541             /* The color as given in the Blt function is in the surface format. */
5542             if (!surface_convert_color_to_float(dst_surface, DDBltFx->u5.dwFillColor, &color))
5543                 return WINED3DERR_INVALIDCALL;
5544
5545             return surface_color_fill(dst_surface, &dst_rect, &color);
5546         }
5547     }
5548
5549     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5550     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5551     return WINED3DERR_INVALIDCALL;
5552 }
5553
5554 /* GL locking is done by the caller */
5555 static void surface_depth_blt(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
5556         GLuint texture, GLsizei w, GLsizei h, GLenum target)
5557 {
5558     struct wined3d_device *device = surface->resource.device;
5559     GLint compare_mode = GL_NONE;
5560     struct blt_info info;
5561     GLint old_binding = 0;
5562     RECT rect;
5563
5564     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5565
5566     glDisable(GL_CULL_FACE);
5567     glDisable(GL_BLEND);
5568     glDisable(GL_ALPHA_TEST);
5569     glDisable(GL_SCISSOR_TEST);
5570     glDisable(GL_STENCIL_TEST);
5571     glEnable(GL_DEPTH_TEST);
5572     glDepthFunc(GL_ALWAYS);
5573     glDepthMask(GL_TRUE);
5574     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5575     glViewport(0, surface->pow2Height - h, w, h);
5576
5577     SetRect(&rect, 0, h, w, 0);
5578     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5579     GL_EXTCALL(glActiveTextureARB(GL_TEXTURE0_ARB));
5580     glGetIntegerv(info.binding, &old_binding);
5581     glBindTexture(info.bind_target, texture);
5582     if (gl_info->supported[ARB_SHADOW])
5583     {
5584         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5585         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5586     }
5587
5588     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5589             gl_info, info.tex_type, &surface->ds_current_size);
5590
5591     glBegin(GL_TRIANGLE_STRIP);
5592     glTexCoord3fv(info.coords[0]);
5593     glVertex2f(-1.0f, -1.0f);
5594     glTexCoord3fv(info.coords[1]);
5595     glVertex2f(1.0f, -1.0f);
5596     glTexCoord3fv(info.coords[2]);
5597     glVertex2f(-1.0f, 1.0f);
5598     glTexCoord3fv(info.coords[3]);
5599     glVertex2f(1.0f, 1.0f);
5600     glEnd();
5601
5602     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5603     glBindTexture(info.bind_target, old_binding);
5604
5605     glPopAttrib();
5606
5607     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5608 }
5609
5610 void surface_modify_ds_location(struct wined3d_surface *surface,
5611         DWORD location, UINT w, UINT h)
5612 {
5613     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5614
5615     if (location & ~SFLAG_DS_LOCATIONS)
5616         FIXME("Invalid location (%#x) specified.\n", location);
5617
5618     surface->ds_current_size.cx = w;
5619     surface->ds_current_size.cy = h;
5620     surface->flags &= ~SFLAG_DS_LOCATIONS;
5621     surface->flags |= location;
5622 }
5623
5624 /* Context activation is done by the caller. */
5625 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5626 {
5627     struct wined3d_device *device = surface->resource.device;
5628     const struct wined3d_gl_info *gl_info = context->gl_info;
5629     GLsizei w, h;
5630
5631     TRACE("surface %p, new location %#x.\n", surface, location);
5632
5633     /* TODO: Make this work for modes other than FBO */
5634     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5635
5636     if (!(surface->flags & location))
5637     {
5638         w = surface->ds_current_size.cx;
5639         h = surface->ds_current_size.cy;
5640         surface->ds_current_size.cx = 0;
5641         surface->ds_current_size.cy = 0;
5642     }
5643     else
5644     {
5645         w = surface->resource.width;
5646         h = surface->resource.height;
5647     }
5648
5649     if (surface->ds_current_size.cx == surface->resource.width
5650             && surface->ds_current_size.cy == surface->resource.height)
5651     {
5652         TRACE("Location (%#x) is already up to date.\n", location);
5653         return;
5654     }
5655
5656     if (surface->current_renderbuffer)
5657     {
5658         FIXME("Not supported with fixed up depth stencil.\n");
5659         return;
5660     }
5661
5662     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5663     {
5664         /* This mostly happens when a depth / stencil is used without being
5665          * cleared first. In principle we could upload from sysmem, or
5666          * explicitly clear before first usage. For the moment there don't
5667          * appear to be a lot of applications depending on this, so a FIXME
5668          * should do. */
5669         FIXME("No up to date depth stencil location.\n");
5670         surface->flags |= location;
5671         surface->ds_current_size.cx = surface->resource.width;
5672         surface->ds_current_size.cy = surface->resource.height;
5673         return;
5674     }
5675
5676     if (location == SFLAG_DS_OFFSCREEN)
5677     {
5678         GLint old_binding = 0;
5679         GLenum bind_target;
5680
5681         /* The render target is allowed to be smaller than the depth/stencil
5682          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5683          * than the offscreen surface. Don't overwrite the offscreen surface
5684          * with undefined data. */
5685         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5686         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5687
5688         TRACE("Copying onscreen depth buffer to depth texture.\n");
5689
5690         ENTER_GL();
5691
5692         if (!device->depth_blt_texture)
5693         {
5694             glGenTextures(1, &device->depth_blt_texture);
5695         }
5696
5697         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5698          * directly on the FBO texture. That's because we need to flip. */
5699         context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
5700         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5701         {
5702             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5703             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5704         }
5705         else
5706         {
5707             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5708             bind_target = GL_TEXTURE_2D;
5709         }
5710         glBindTexture(bind_target, device->depth_blt_texture);
5711         glCopyTexImage2D(bind_target, surface->texture_level, surface->resource.format->glInternal, 0, 0, w, h, 0);
5712         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5713         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5714         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5715         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5716         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5717         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5718         glBindTexture(bind_target, old_binding);
5719
5720         /* Setup the destination */
5721         if (!device->depth_blt_rb)
5722         {
5723             gl_info->fbo_ops.glGenRenderbuffers(1, &device->depth_blt_rb);
5724             checkGLcall("glGenRenderbuffersEXT");
5725         }
5726         if (device->depth_blt_rb_w != w || device->depth_blt_rb_h != h)
5727         {
5728             gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, device->depth_blt_rb);
5729             checkGLcall("glBindRenderbufferEXT");
5730             gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, w, h);
5731             checkGLcall("glRenderbufferStorageEXT");
5732             device->depth_blt_rb_w = w;
5733             device->depth_blt_rb_h = h;
5734         }
5735
5736         context_bind_fbo(context, GL_FRAMEBUFFER, &context->dst_fbo);
5737         gl_info->fbo_ops.glFramebufferRenderbuffer(GL_FRAMEBUFFER,
5738                 GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, device->depth_blt_rb);
5739         checkGLcall("glFramebufferRenderbufferEXT");
5740         context_attach_depth_stencil_fbo(context, GL_FRAMEBUFFER, surface, FALSE);
5741
5742         /* Do the actual blit */
5743         surface_depth_blt(surface, gl_info, device->depth_blt_texture, w, h, bind_target);
5744         checkGLcall("depth_blt");
5745
5746         if (context->current_fbo) context_bind_fbo(context, GL_FRAMEBUFFER, &context->current_fbo->id);
5747         else context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
5748
5749         LEAVE_GL();
5750
5751         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5752     }
5753     else if (location == SFLAG_DS_ONSCREEN)
5754     {
5755         TRACE("Copying depth texture to onscreen depth buffer.\n");
5756
5757         ENTER_GL();
5758
5759         context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
5760         surface_depth_blt(surface, gl_info, surface->texture_name,
5761                 w, h, surface->texture_target);
5762         checkGLcall("depth_blt");
5763
5764         if (context->current_fbo) context_bind_fbo(context, GL_FRAMEBUFFER, &context->current_fbo->id);
5765
5766         LEAVE_GL();
5767
5768         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5769     }
5770     else
5771     {
5772         ERR("Invalid location (%#x) specified.\n", location);
5773     }
5774
5775     surface->flags |= location;
5776     surface->ds_current_size.cx = surface->resource.width;
5777     surface->ds_current_size.cy = surface->resource.height;
5778 }
5779
5780 void surface_modify_location(struct wined3d_surface *surface, DWORD flag, BOOL persistent)
5781 {
5782     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5783     struct wined3d_surface *overlay;
5784
5785     TRACE("surface %p, location %s, persistent %#x.\n",
5786             surface, debug_surflocation(flag), persistent);
5787
5788     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
5789     {
5790         if (surface_is_offscreen(surface))
5791         {
5792             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
5793             if (flag & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)) flag |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
5794         }
5795         else
5796         {
5797             TRACE("Surface %p is an onscreen surface.\n", surface);
5798         }
5799     }
5800
5801     if (flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5802             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5803     {
5804         flag |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5805     }
5806
5807     if (persistent)
5808     {
5809         if (((surface->flags & SFLAG_INTEXTURE) && !(flag & SFLAG_INTEXTURE))
5810                 || ((surface->flags & SFLAG_INSRGBTEX) && !(flag & SFLAG_INSRGBTEX)))
5811         {
5812             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5813             {
5814                 TRACE("Passing to container.\n");
5815                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5816             }
5817         }
5818         surface->flags &= ~SFLAG_LOCATIONS;
5819         surface->flags |= flag;
5820
5821         /* Redraw emulated overlays, if any */
5822         if (flag & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5823         {
5824             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5825             {
5826                 overlay->surface_ops->surface_draw_overlay(overlay);
5827             }
5828         }
5829     }
5830     else
5831     {
5832         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5833         {
5834             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5835             {
5836                 TRACE("Passing to container\n");
5837                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5838             }
5839         }
5840         surface->flags &= ~flag;
5841     }
5842
5843     if (!(surface->flags & SFLAG_LOCATIONS))
5844     {
5845         ERR("Surface %p does not have any up to date location.\n", surface);
5846     }
5847 }
5848
5849 static DWORD resource_access_from_location(DWORD location)
5850 {
5851     switch (location)
5852     {
5853         case SFLAG_INSYSMEM:
5854             return WINED3D_RESOURCE_ACCESS_CPU;
5855
5856         case SFLAG_INDRAWABLE:
5857         case SFLAG_INSRGBTEX:
5858         case SFLAG_INTEXTURE:
5859             return WINED3D_RESOURCE_ACCESS_GPU;
5860
5861         default:
5862             FIXME("Unhandled location %#x.\n", location);
5863             return 0;
5864     }
5865 }
5866
5867 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD flag, const RECT *rect)
5868 {
5869     struct wined3d_device *device = surface->resource.device;
5870     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5871     BOOL drawable_read_ok = surface_is_offscreen(surface);
5872     struct wined3d_format format;
5873     CONVERT_TYPES convert;
5874     int width, pitch, outpitch;
5875     BYTE *mem;
5876     BOOL in_fbo = FALSE;
5877
5878     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(flag), wine_dbgstr_rect(rect));
5879
5880     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5881     {
5882         if (flag == SFLAG_INTEXTURE)
5883         {
5884             struct wined3d_context *context = context_acquire(device, NULL);
5885             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
5886             context_release(context);
5887             return WINED3D_OK;
5888         }
5889         else
5890         {
5891             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(flag));
5892             return WINED3DERR_INVALIDCALL;
5893         }
5894     }
5895
5896     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
5897     {
5898         if (surface_is_offscreen(surface))
5899         {
5900             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets.
5901              * Prefer SFLAG_INTEXTURE. */
5902             if (flag == SFLAG_INDRAWABLE) flag = SFLAG_INTEXTURE;
5903             drawable_read_ok = FALSE;
5904             in_fbo = TRUE;
5905         }
5906         else
5907         {
5908             TRACE("Surface %p is an onscreen surface.\n", surface);
5909         }
5910     }
5911
5912     if (flag == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5913     {
5914         flag = SFLAG_INTEXTURE;
5915     }
5916
5917     if (surface->flags & flag)
5918     {
5919         TRACE("Location already up to date\n");
5920         return WINED3D_OK;
5921     }
5922
5923     if (WARN_ON(d3d_surface))
5924     {
5925         DWORD required_access = resource_access_from_location(flag);
5926         if ((surface->resource.access_flags & required_access) != required_access)
5927             WARN("Operation requires %#x access, but surface only has %#x.\n",
5928                     required_access, surface->resource.access_flags);
5929     }
5930
5931     if (!(surface->flags & SFLAG_LOCATIONS))
5932     {
5933         ERR("Surface %p does not have any up to date location.\n", surface);
5934         surface->flags |= SFLAG_LOST;
5935         return WINED3DERR_DEVICELOST;
5936     }
5937
5938     if (flag == SFLAG_INSYSMEM)
5939     {
5940         surface_prepare_system_memory(surface);
5941
5942         /* Download the surface to system memory */
5943         if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5944         {
5945             struct wined3d_context *context = NULL;
5946
5947             if (!device->isInDraw) context = context_acquire(device, NULL);
5948
5949             surface_bind_and_dirtify(surface, gl_info, !(surface->flags & SFLAG_INTEXTURE));
5950             surface_download_data(surface, gl_info);
5951
5952             if (context) context_release(context);
5953         }
5954         else
5955         {
5956             /* Note: It might be faster to download into a texture first. */
5957             read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5958                     wined3d_surface_get_pitch(surface));
5959         }
5960     }
5961     else if (flag == SFLAG_INDRAWABLE)
5962     {
5963         if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5964             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5965
5966         if (surface->flags & SFLAG_INTEXTURE)
5967         {
5968             RECT r;
5969
5970             surface_get_rect(surface, rect, &r);
5971             surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5972         }
5973         else
5974         {
5975             int byte_count;
5976             if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5977             {
5978                 /* This needs a shader to convert the srgb data sampled from the GL texture into RGB
5979                  * values, otherwise we get incorrect values in the target. For now go the slow way
5980                  * via a system memory copy
5981                  */
5982                 surface_load_location(surface, SFLAG_INSYSMEM, rect);
5983             }
5984
5985             d3dfmt_get_conv(surface, FALSE /* We need color keying */,
5986                     FALSE /* We won't use textures */, &format, &convert);
5987
5988             /* The width is in 'length' not in bytes */
5989             width = surface->resource.width;
5990             pitch = wined3d_surface_get_pitch(surface);
5991
5992             /* Don't use PBOs for converted surfaces. During PBO conversion we look at SFLAG_CONVERTED
5993              * but it isn't set (yet) in all cases it is getting called. */
5994             if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5995             {
5996                 struct wined3d_context *context = NULL;
5997
5998                 TRACE("Removing the pbo attached to surface %p.\n", surface);
5999
6000                 if (!device->isInDraw) context = context_acquire(device, NULL);
6001                 surface_remove_pbo(surface, gl_info);
6002                 if (context) context_release(context);
6003             }
6004
6005             if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
6006             {
6007                 int height = surface->resource.height;
6008                 byte_count = format.conv_byte_count;
6009
6010                 /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6011                 outpitch = width * byte_count;
6012                 outpitch = (outpitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6013
6014                 mem = HeapAlloc(GetProcessHeap(), 0, outpitch * height);
6015                 if(!mem) {
6016                     ERR("Out of memory %d, %d!\n", outpitch, height);
6017                     return WINED3DERR_OUTOFVIDEOMEMORY;
6018                 }
6019                 d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, pitch,
6020                         width, height, outpitch, convert, surface);
6021
6022                 surface->flags |= SFLAG_CONVERTED;
6023             }
6024             else
6025             {
6026                 surface->flags &= ~SFLAG_CONVERTED;
6027                 mem = surface->resource.allocatedMemory;
6028                 byte_count = format.byte_count;
6029             }
6030
6031             flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
6032
6033             /* Don't delete PBO memory */
6034             if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6035                 HeapFree(GetProcessHeap(), 0, mem);
6036         }
6037     }
6038     else /* if(flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) */
6039     {
6040         const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
6041
6042         if (drawable_read_ok && (surface->flags & SFLAG_INDRAWABLE))
6043         {
6044             read_from_framebuffer_texture(surface, flag == SFLAG_INSRGBTEX);
6045         }
6046         else if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6047                 && (surface->resource.format->flags & attach_flags) == attach_flags
6048                 && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6049                         NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6050                         NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6051         {
6052             DWORD src_location = flag == SFLAG_INSRGBTEX ? SFLAG_INTEXTURE : SFLAG_INSRGBTEX;
6053             RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6054
6055             surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6056                     surface, src_location, &rect, surface, flag, &rect);
6057         }
6058         else
6059         {
6060             /* Upload from system memory */
6061             BOOL srgb = flag == SFLAG_INSRGBTEX;
6062             struct wined3d_context *context = NULL;
6063
6064             d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6065                     TRUE /* We will use textures */, &format, &convert);
6066
6067             if (srgb)
6068             {
6069                 if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6070                 {
6071                     /* Performance warning... */
6072                     FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6073                     surface_load_location(surface, SFLAG_INSYSMEM, rect);
6074                 }
6075             }
6076             else
6077             {
6078                 if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6079                 {
6080                     /* Performance warning... */
6081                     FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6082                     surface_load_location(surface, SFLAG_INSYSMEM, rect);
6083                 }
6084             }
6085             if (!(surface->flags & SFLAG_INSYSMEM))
6086             {
6087                 WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6088                 /* Lets hope we get it from somewhere... */
6089                 surface_load_location(surface, SFLAG_INSYSMEM, rect);
6090             }
6091
6092             if (!device->isInDraw) context = context_acquire(device, NULL);
6093
6094             surface_prepare_texture(surface, gl_info, srgb);
6095             surface_bind_and_dirtify(surface, gl_info, srgb);
6096
6097             if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6098             {
6099                 surface->flags |= SFLAG_GLCKEY;
6100                 surface->glCKey = surface->SrcBltCKey;
6101             }
6102             else surface->flags &= ~SFLAG_GLCKEY;
6103
6104             /* The width is in 'length' not in bytes */
6105             width = surface->resource.width;
6106             pitch = wined3d_surface_get_pitch(surface);
6107
6108             /* Don't use PBOs for converted surfaces. During PBO conversion we look at SFLAG_CONVERTED
6109              * but it isn't set (yet) in all cases it is getting called. */
6110             if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6111             {
6112                 TRACE("Removing the pbo attached to surface %p.\n", surface);
6113                 surface_remove_pbo(surface, gl_info);
6114             }
6115
6116             if (format.convert)
6117             {
6118                 /* This code is entered for texture formats which need a fixup. */
6119                 UINT height = surface->resource.height;
6120
6121                 /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6122                 outpitch = width * format.conv_byte_count;
6123                 outpitch = (outpitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6124
6125                 mem = HeapAlloc(GetProcessHeap(), 0, outpitch * height);
6126                 if(!mem) {
6127                     ERR("Out of memory %d, %d!\n", outpitch, height);
6128                     if (context) context_release(context);
6129                     return WINED3DERR_OUTOFVIDEOMEMORY;
6130                 }
6131                 format.convert(surface->resource.allocatedMemory, mem, pitch, width, height);
6132             }
6133             else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6134             {
6135                 /* This code is only entered for color keying fixups */
6136                 UINT height = surface->resource.height;
6137
6138                 /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6139                 outpitch = width * format.conv_byte_count;
6140                 outpitch = (outpitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6141
6142                 mem = HeapAlloc(GetProcessHeap(), 0, outpitch * height);
6143                 if(!mem) {
6144                     ERR("Out of memory %d, %d!\n", outpitch, height);
6145                     if (context) context_release(context);
6146                     return WINED3DERR_OUTOFVIDEOMEMORY;
6147                 }
6148                 d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, pitch,
6149                         width, height, outpitch, convert, surface);
6150             }
6151             else
6152             {
6153                 mem = surface->resource.allocatedMemory;
6154             }
6155
6156             /* Make sure the correct pitch is used */
6157             ENTER_GL();
6158             glPixelStorei(GL_UNPACK_ROW_LENGTH, width);
6159             LEAVE_GL();
6160
6161             if (mem || (surface->flags & SFLAG_PBO))
6162                 surface_upload_data(surface, gl_info, &format, srgb, mem);
6163
6164             /* Restore the default pitch */
6165             ENTER_GL();
6166             glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
6167             LEAVE_GL();
6168
6169             if (context) context_release(context);
6170
6171             /* Don't delete PBO memory */
6172             if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6173                 HeapFree(GetProcessHeap(), 0, mem);
6174         }
6175     }
6176
6177     if (!rect)
6178     {
6179         surface->flags |= flag;
6180
6181         if (flag != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6182             surface_evict_sysmem(surface);
6183     }
6184
6185     if (in_fbo && (surface->flags & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)))
6186     {
6187         /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
6188         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
6189     }
6190
6191     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6192             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6193     {
6194         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6195     }
6196
6197     return WINED3D_OK;
6198 }
6199
6200 BOOL surface_is_offscreen(struct wined3d_surface *surface)
6201 {
6202     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6203
6204     /* Not on a swapchain - must be offscreen */
6205     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6206
6207     /* The front buffer is always onscreen */
6208     if (surface == swapchain->front_buffer) return FALSE;
6209
6210     /* If the swapchain is rendered to an FBO, the backbuffer is
6211      * offscreen, otherwise onscreen */
6212     return swapchain->render_to_fbo;
6213 }
6214
6215 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6216 /* Context activation is done by the caller. */
6217 static void ffp_blit_free(struct wined3d_device *device) { }
6218
6219 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6220 /* Context activation is done by the caller. */
6221 static void ffp_blit_p8_upload_palette(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6222 {
6223     BYTE table[256][4];
6224     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6225
6226     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6227
6228     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6229     ENTER_GL();
6230     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6231     LEAVE_GL();
6232 }
6233
6234 /* Context activation is done by the caller. */
6235 static HRESULT ffp_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6236 {
6237     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6238
6239     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6240      * else the surface is converted in software at upload time in LoadLocation.
6241      */
6242     if(fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6243         ffp_blit_p8_upload_palette(surface, gl_info);
6244
6245     ENTER_GL();
6246     glEnable(surface->texture_target);
6247     checkGLcall("glEnable(surface->texture_target)");
6248     LEAVE_GL();
6249     return WINED3D_OK;
6250 }
6251
6252 /* Context activation is done by the caller. */
6253 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6254 {
6255     ENTER_GL();
6256     glDisable(GL_TEXTURE_2D);
6257     checkGLcall("glDisable(GL_TEXTURE_2D)");
6258     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6259     {
6260         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6261         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6262     }
6263     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6264     {
6265         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6266         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6267     }
6268     LEAVE_GL();
6269 }
6270
6271 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6272         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6273         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6274 {
6275     enum complex_fixup src_fixup;
6276
6277     switch (blit_op)
6278     {
6279         case WINED3D_BLIT_OP_COLOR_BLIT:
6280             src_fixup = get_complex_fixup(src_format->color_fixup);
6281             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6282             {
6283                 TRACE("Checking support for fixup:\n");
6284                 dump_color_fixup_desc(src_format->color_fixup);
6285             }
6286
6287             if (!is_identity_fixup(dst_format->color_fixup))
6288             {
6289                 TRACE("Destination fixups are not supported\n");
6290                 return FALSE;
6291             }
6292
6293             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6294             {
6295                 TRACE("P8 fixup supported\n");
6296                 return TRUE;
6297             }
6298
6299             /* We only support identity conversions. */
6300             if (is_identity_fixup(src_format->color_fixup))
6301             {
6302                 TRACE("[OK]\n");
6303                 return TRUE;
6304             }
6305
6306             TRACE("[FAILED]\n");
6307             return FALSE;
6308
6309         case WINED3D_BLIT_OP_COLOR_FILL:
6310             if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6311             {
6312                 TRACE("Color fill not supported\n");
6313                 return FALSE;
6314             }
6315
6316             return TRUE;
6317
6318         case WINED3D_BLIT_OP_DEPTH_FILL:
6319             return TRUE;
6320
6321         default:
6322             TRACE("Unsupported blit_op=%d\n", blit_op);
6323             return FALSE;
6324     }
6325 }
6326
6327 /* Do not call while under the GL lock. */
6328 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6329         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6330 {
6331     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6332
6333     return device_clear_render_targets(device, 1, &dst_surface, NULL,
6334             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6335 }
6336
6337 /* Do not call while under the GL lock. */
6338 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6339         struct wined3d_surface *surface, const RECT *rect, float depth)
6340 {
6341     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6342
6343     return device_clear_render_targets(device, 0, NULL, surface,
6344             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6345 }
6346
6347 const struct blit_shader ffp_blit =  {
6348     ffp_blit_alloc,
6349     ffp_blit_free,
6350     ffp_blit_set,
6351     ffp_blit_unset,
6352     ffp_blit_supported,
6353     ffp_blit_color_fill,
6354     ffp_blit_depth_fill,
6355 };
6356
6357 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6358 {
6359     return WINED3D_OK;
6360 }
6361
6362 /* Context activation is done by the caller. */
6363 static void cpu_blit_free(struct wined3d_device *device)
6364 {
6365 }
6366
6367 /* Context activation is done by the caller. */
6368 static HRESULT cpu_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6369 {
6370     return WINED3D_OK;
6371 }
6372
6373 /* Context activation is done by the caller. */
6374 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6375 {
6376 }
6377
6378 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6379         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6380         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6381 {
6382     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6383     {
6384         return TRUE;
6385     }
6386
6387     return FALSE;
6388 }
6389
6390 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6391         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6392         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6393 {
6394     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6395     const struct wined3d_format *src_format, *dst_format;
6396     struct wined3d_surface *orig_src = src_surface;
6397     WINED3DLOCKED_RECT dlock, slock;
6398     HRESULT hr = WINED3D_OK;
6399     const BYTE *sbuf;
6400     RECT xdst,xsrc;
6401     BYTE *dbuf;
6402     int x, y;
6403
6404     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6405             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6406             flags, fx, debug_d3dtexturefiltertype(filter));
6407
6408     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
6409     {
6410         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY\n");
6411         return WINEDDERR_SURFACEBUSY;
6412     }
6413
6414     /* First check for the validity of source / destination rectangles.
6415      * This was verified using a test application and by MSDN. */
6416     if (src_rect)
6417     {
6418         if (src_surface)
6419         {
6420             if (src_rect->right < src_rect->left || src_rect->bottom < src_rect->top
6421                     || src_rect->left > src_surface->resource.width || src_rect->left < 0
6422                     || src_rect->top > src_surface->resource.height || src_rect->top < 0
6423                     || src_rect->right > src_surface->resource.width || src_rect->right < 0
6424                     || src_rect->bottom > src_surface->resource.height || src_rect->bottom < 0)
6425             {
6426                 WARN("Application gave us bad source rectangle for Blt.\n");
6427                 return WINEDDERR_INVALIDRECT;
6428             }
6429
6430             if (!src_rect->right || !src_rect->bottom
6431                     || src_rect->left == (int)src_surface->resource.width
6432                     || src_rect->top == (int)src_surface->resource.height)
6433             {
6434                 TRACE("Nothing to be done.\n");
6435                 return WINED3D_OK;
6436             }
6437         }
6438
6439         xsrc = *src_rect;
6440     }
6441     else if (src_surface)
6442     {
6443         xsrc.left = 0;
6444         xsrc.top = 0;
6445         xsrc.right = src_surface->resource.width;
6446         xsrc.bottom = src_surface->resource.height;
6447     }
6448     else
6449     {
6450         memset(&xsrc, 0, sizeof(xsrc));
6451     }
6452
6453     if (dst_rect)
6454     {
6455         /* For the Destination rect, it can be out of bounds on the condition
6456          * that a clipper is set for the given surface. */
6457         if (!dst_surface->clipper && (dst_rect->right < dst_rect->left || dst_rect->bottom < dst_rect->top
6458                 || dst_rect->left > dst_surface->resource.width || dst_rect->left < 0
6459                 || dst_rect->top > dst_surface->resource.height || dst_rect->top < 0
6460                 || dst_rect->right > dst_surface->resource.width || dst_rect->right < 0
6461                 || dst_rect->bottom > dst_surface->resource.height || dst_rect->bottom < 0))
6462         {
6463             WARN("Application gave us bad destination rectangle for Blt without a clipper set.\n");
6464             return WINEDDERR_INVALIDRECT;
6465         }
6466
6467         if (dst_rect->right <= 0 || dst_rect->bottom <= 0
6468                 || dst_rect->left >= (int)dst_surface->resource.width
6469                 || dst_rect->top >= (int)dst_surface->resource.height)
6470         {
6471             TRACE("Nothing to be done.\n");
6472             return WINED3D_OK;
6473         }
6474
6475         if (!src_surface)
6476         {
6477             RECT full_rect;
6478
6479             full_rect.left = 0;
6480             full_rect.top = 0;
6481             full_rect.right = dst_surface->resource.width;
6482             full_rect.bottom = dst_surface->resource.height;
6483             IntersectRect(&xdst, &full_rect, dst_rect);
6484         }
6485         else
6486         {
6487             BOOL clip_horiz, clip_vert;
6488
6489             xdst = *dst_rect;
6490             clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6491             clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6492
6493             if (clip_vert || clip_horiz)
6494             {
6495                 /* Now check if this is a special case or not... */
6496                 if ((flags & WINEDDBLT_DDFX)
6497                         || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6498                         || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6499                 {
6500                     WARN("Out of screen rectangle in special case. Not handled right now.\n");
6501                     return WINED3D_OK;
6502                 }
6503
6504                 if (clip_horiz)
6505                 {
6506                     if (xdst.left < 0)
6507                     {
6508                         xsrc.left -= xdst.left;
6509                         xdst.left = 0;
6510                     }
6511                     if (xdst.right > dst_surface->resource.width)
6512                     {
6513                         xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6514                         xdst.right = (int)dst_surface->resource.width;
6515                     }
6516                 }
6517
6518                 if (clip_vert)
6519                 {
6520                     if (xdst.top < 0)
6521                     {
6522                         xsrc.top -= xdst.top;
6523                         xdst.top = 0;
6524                     }
6525                     if (xdst.bottom > dst_surface->resource.height)
6526                     {
6527                         xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6528                         xdst.bottom = (int)dst_surface->resource.height;
6529                     }
6530                 }
6531
6532                 /* And check if after clipping something is still to be done... */
6533                 if ((xdst.right <= 0) || (xdst.bottom <= 0)
6534                         || (xdst.left >= (int)dst_surface->resource.width)
6535                         || (xdst.top >= (int)dst_surface->resource.height)
6536                         || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6537                         || (xsrc.left >= (int)src_surface->resource.width)
6538                         || (xsrc.top >= (int)src_surface->resource.height))
6539                 {
6540                     TRACE("Nothing to be done after clipping.\n");
6541                     return WINED3D_OK;
6542                 }
6543             }
6544         }
6545     }
6546     else
6547     {
6548         xdst.left = 0;
6549         xdst.top = 0;
6550         xdst.right = dst_surface->resource.width;
6551         xdst.bottom = dst_surface->resource.height;
6552     }
6553
6554     if (src_surface == dst_surface)
6555     {
6556         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6557         slock = dlock;
6558         src_format = dst_surface->resource.format;
6559         dst_format = src_format;
6560     }
6561     else
6562     {
6563         dst_format = dst_surface->resource.format;
6564         if (src_surface)
6565         {
6566             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6567             {
6568                 src_surface = surface_convert_format(src_surface, dst_format->id);
6569                 if (!src_surface)
6570                 {
6571                     /* The conv function writes a FIXME */
6572                     WARN("Cannot convert source surface format to dest format.\n");
6573                     goto release;
6574                 }
6575             }
6576             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6577             src_format = src_surface->resource.format;
6578         }
6579         else
6580         {
6581             src_format = dst_format;
6582         }
6583         if (dst_rect)
6584             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6585         else
6586             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6587     }
6588
6589     if (!fx || !(fx->dwDDFX)) flags &= ~WINEDDBLT_DDFX;
6590
6591     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_FOURCC)
6592     {
6593         if (!dst_rect || src_surface == dst_surface)
6594         {
6595             memcpy(dlock.pBits, slock.pBits, dst_surface->resource.size);
6596             goto release;
6597         }
6598     }
6599
6600     bpp = dst_surface->resource.format->byte_count;
6601     srcheight = xsrc.bottom - xsrc.top;
6602     srcwidth = xsrc.right - xsrc.left;
6603     dstheight = xdst.bottom - xdst.top;
6604     dstwidth = xdst.right - xdst.left;
6605     width = (xdst.right - xdst.left) * bpp;
6606
6607     if (dst_rect && src_surface != dst_surface)
6608         dbuf = dlock.pBits;
6609     else
6610         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6611
6612     if (flags & WINEDDBLT_WAIT)
6613     {
6614         flags &= ~WINEDDBLT_WAIT;
6615     }
6616     if (flags & WINEDDBLT_ASYNC)
6617     {
6618         static BOOL displayed = FALSE;
6619         if (!displayed)
6620             FIXME("Can't handle WINEDDBLT_ASYNC flag right now.\n");
6621         displayed = TRUE;
6622         flags &= ~WINEDDBLT_ASYNC;
6623     }
6624     if (flags & WINEDDBLT_DONOTWAIT)
6625     {
6626         /* WINEDDBLT_DONOTWAIT appeared in DX7 */
6627         static BOOL displayed = FALSE;
6628         if (!displayed)
6629             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag right now.\n");
6630         displayed = TRUE;
6631         flags &= ~WINEDDBLT_DONOTWAIT;
6632     }
6633
6634     /* First, all the 'source-less' blits */
6635     if (flags & WINEDDBLT_COLORFILL)
6636     {
6637         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6638         flags &= ~WINEDDBLT_COLORFILL;
6639     }
6640
6641     if (flags & WINEDDBLT_DEPTHFILL)
6642     {
6643         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6644     }
6645     if (flags & WINEDDBLT_ROP)
6646     {
6647         /* Catch some degenerate cases here. */
6648         switch (fx->dwROP)
6649         {
6650             case BLACKNESS:
6651                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6652                 break;
6653             case 0xAA0029: /* No-op */
6654                 break;
6655             case WHITENESS:
6656                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6657                 break;
6658             case SRCCOPY: /* Well, we do that below? */
6659                 break;
6660             default:
6661                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6662                 goto error;
6663         }
6664         flags &= ~WINEDDBLT_ROP;
6665     }
6666     if (flags & WINEDDBLT_DDROPS)
6667     {
6668         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6669     }
6670     /* Now the 'with source' blits. */
6671     if (src_surface)
6672     {
6673         const BYTE *sbase;
6674         int sx, xinc, sy, yinc;
6675
6676         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6677             goto release;
6678
6679         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6680                 && (srcwidth != dstwidth || srcheight != dstheight))
6681         {
6682             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6683             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6684         }
6685
6686         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6687         xinc = (srcwidth << 16) / dstwidth;
6688         yinc = (srcheight << 16) / dstheight;
6689
6690         if (!flags)
6691         {
6692             /* No effects, we can cheat here. */
6693             if (dstwidth == srcwidth)
6694             {
6695                 if (dstheight == srcheight)
6696                 {
6697                     /* No stretching in either direction. This needs to be as
6698                      * fast as possible. */
6699                     sbuf = sbase;
6700
6701                     /* Check for overlapping surfaces. */
6702                     if (src_surface != dst_surface || xdst.top < xsrc.top
6703                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6704                     {
6705                         /* No overlap, or dst above src, so copy from top downwards. */
6706                         for (y = 0; y < dstheight; ++y)
6707                         {
6708                             memcpy(dbuf, sbuf, width);
6709                             sbuf += slock.Pitch;
6710                             dbuf += dlock.Pitch;
6711                         }
6712                     }
6713                     else if (xdst.top > xsrc.top)
6714                     {
6715                         /* Copy from bottom upwards. */
6716                         sbuf += (slock.Pitch*dstheight);
6717                         dbuf += (dlock.Pitch*dstheight);
6718                         for (y = 0; y < dstheight; ++y)
6719                         {
6720                             sbuf -= slock.Pitch;
6721                             dbuf -= dlock.Pitch;
6722                             memcpy(dbuf, sbuf, width);
6723                         }
6724                     }
6725                     else
6726                     {
6727                         /* Src and dst overlapping on the same line, use memmove. */
6728                         for (y = 0; y < dstheight; ++y)
6729                         {
6730                             memmove(dbuf, sbuf, width);
6731                             sbuf += slock.Pitch;
6732                             dbuf += dlock.Pitch;
6733                         }
6734                     }
6735                 }
6736                 else
6737                 {
6738                     /* Stretching in y direction only. */
6739                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6740                     {
6741                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6742                         memcpy(dbuf, sbuf, width);
6743                         dbuf += dlock.Pitch;
6744                     }
6745                 }
6746             }
6747             else
6748             {
6749                 /* Stretching in X direction. */
6750                 int last_sy = -1;
6751                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6752                 {
6753                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6754
6755                     if ((sy >> 16) == (last_sy >> 16))
6756                     {
6757                         /* This source row is the same as last source row -
6758                          * Copy the already stretched row. */
6759                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6760                     }
6761                     else
6762                     {
6763 #define STRETCH_ROW(type) \
6764 do { \
6765     const type *s = (const type *)sbuf; \
6766     type *d = (type *)dbuf; \
6767     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6768         d[x] = s[sx >> 16]; \
6769 } while(0)
6770
6771                         switch(bpp)
6772                         {
6773                             case 1:
6774                                 STRETCH_ROW(BYTE);
6775                                 break;
6776                             case 2:
6777                                 STRETCH_ROW(WORD);
6778                                 break;
6779                             case 4:
6780                                 STRETCH_ROW(DWORD);
6781                                 break;
6782                             case 3:
6783                             {
6784                                 const BYTE *s;
6785                                 BYTE *d = dbuf;
6786                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6787                                 {
6788                                     DWORD pixel;
6789
6790                                     s = sbuf + 3 * (sx >> 16);
6791                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6792                                     d[0] = (pixel      ) & 0xff;
6793                                     d[1] = (pixel >>  8) & 0xff;
6794                                     d[2] = (pixel >> 16) & 0xff;
6795                                     d += 3;
6796                                 }
6797                                 break;
6798                             }
6799                             default:
6800                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6801                                 hr = WINED3DERR_NOTAVAILABLE;
6802                                 goto error;
6803                         }
6804 #undef STRETCH_ROW
6805                     }
6806                     dbuf += dlock.Pitch;
6807                     last_sy = sy;
6808                 }
6809             }
6810         }
6811         else
6812         {
6813             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6814             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6815             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6816             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6817             {
6818                 /* The color keying flags are checked for correctness in ddraw */
6819                 if (flags & WINEDDBLT_KEYSRC)
6820                 {
6821                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6822                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6823                 }
6824                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6825                 {
6826                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6827                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6828                 }
6829
6830                 if (flags & WINEDDBLT_KEYDEST)
6831                 {
6832                     /* Destination color keys are taken from the source surface! */
6833                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6834                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6835                 }
6836                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6837                 {
6838                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6839                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6840                 }
6841
6842                 if (bpp == 1)
6843                 {
6844                     keymask = 0xff;
6845                 }
6846                 else
6847                 {
6848                     keymask = src_format->red_mask
6849                             | src_format->green_mask
6850                             | src_format->blue_mask;
6851                 }
6852                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6853             }
6854
6855             if (flags & WINEDDBLT_DDFX)
6856             {
6857                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6858                 LONG tmpxy;
6859                 dTopLeft     = dbuf;
6860                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6861                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6862                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6863
6864                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6865                 {
6866                     /* I don't think we need to do anything about this flag */
6867                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6868                 }
6869                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6870                 {
6871                     tmp          = dTopRight;
6872                     dTopRight    = dTopLeft;
6873                     dTopLeft     = tmp;
6874                     tmp          = dBottomRight;
6875                     dBottomRight = dBottomLeft;
6876                     dBottomLeft  = tmp;
6877                     dstxinc = dstxinc * -1;
6878                 }
6879                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6880                 {
6881                     tmp          = dTopLeft;
6882                     dTopLeft     = dBottomLeft;
6883                     dBottomLeft  = tmp;
6884                     tmp          = dTopRight;
6885                     dTopRight    = dBottomRight;
6886                     dBottomRight = tmp;
6887                     dstyinc = dstyinc * -1;
6888                 }
6889                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6890                 {
6891                     /* I don't think we need to do anything about this flag */
6892                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6893                 }
6894                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6895                 {
6896                     tmp          = dBottomRight;
6897                     dBottomRight = dTopLeft;
6898                     dTopLeft     = tmp;
6899                     tmp          = dBottomLeft;
6900                     dBottomLeft  = dTopRight;
6901                     dTopRight    = tmp;
6902                     dstxinc = dstxinc * -1;
6903                     dstyinc = dstyinc * -1;
6904                 }
6905                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6906                 {
6907                     tmp          = dTopLeft;
6908                     dTopLeft     = dBottomLeft;
6909                     dBottomLeft  = dBottomRight;
6910                     dBottomRight = dTopRight;
6911                     dTopRight    = tmp;
6912                     tmpxy   = dstxinc;
6913                     dstxinc = dstyinc;
6914                     dstyinc = tmpxy;
6915                     dstxinc = dstxinc * -1;
6916                 }
6917                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6918                 {
6919                     tmp          = dTopLeft;
6920                     dTopLeft     = dTopRight;
6921                     dTopRight    = dBottomRight;
6922                     dBottomRight = dBottomLeft;
6923                     dBottomLeft  = tmp;
6924                     tmpxy   = dstxinc;
6925                     dstxinc = dstyinc;
6926                     dstyinc = tmpxy;
6927                     dstyinc = dstyinc * -1;
6928                 }
6929                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6930                 {
6931                     /* I don't think we need to do anything about this flag */
6932                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6933                 }
6934                 dbuf = dTopLeft;
6935                 flags &= ~(WINEDDBLT_DDFX);
6936             }
6937
6938 #define COPY_COLORKEY_FX(type) \
6939 do { \
6940     const type *s; \
6941     type *d = (type *)dbuf, *dx, tmp; \
6942     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6943     { \
6944         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6945         dx = d; \
6946         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6947         { \
6948             tmp = s[sx >> 16]; \
6949             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6950                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6951             { \
6952                 dx[0] = tmp; \
6953             } \
6954             dx = (type *)(((BYTE *)dx) + dstxinc); \
6955         } \
6956         d = (type *)(((BYTE *)d) + dstyinc); \
6957     } \
6958 } while(0)
6959
6960             switch (bpp)
6961             {
6962                 case 1:
6963                     COPY_COLORKEY_FX(BYTE);
6964                     break;
6965                 case 2:
6966                     COPY_COLORKEY_FX(WORD);
6967                     break;
6968                 case 4:
6969                     COPY_COLORKEY_FX(DWORD);
6970                     break;
6971                 case 3:
6972                 {
6973                     const BYTE *s;
6974                     BYTE *d = dbuf, *dx;
6975                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6976                     {
6977                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6978                         dx = d;
6979                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
6980                         {
6981                             DWORD pixel, dpixel = 0;
6982                             s = sbuf + 3 * (sx>>16);
6983                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6984                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
6985                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
6986                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
6987                             {
6988                                 dx[0] = (pixel      ) & 0xff;
6989                                 dx[1] = (pixel >>  8) & 0xff;
6990                                 dx[2] = (pixel >> 16) & 0xff;
6991                             }
6992                             dx += dstxinc;
6993                         }
6994                         d += dstyinc;
6995                     }
6996                     break;
6997                 }
6998                 default:
6999                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7000                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7001                     hr = WINED3DERR_NOTAVAILABLE;
7002                     goto error;
7003 #undef COPY_COLORKEY_FX
7004             }
7005         }
7006     }
7007
7008 error:
7009     if (flags && FIXME_ON(d3d_surface))
7010     {
7011         FIXME("\tUnsupported flags: %#x.\n", flags);
7012     }
7013
7014 release:
7015     wined3d_surface_unmap(dst_surface);
7016     if (src_surface && src_surface != dst_surface)
7017         wined3d_surface_unmap(src_surface);
7018     /* Release the converted surface, if any. */
7019     if (src_surface && src_surface != orig_src)
7020         wined3d_surface_decref(src_surface);
7021
7022     return hr;
7023 }
7024
7025 static HRESULT surface_cpu_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
7026         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
7027 {
7028     const struct wined3d_format *src_format, *dst_format;
7029     RECT lock_src, lock_dst, lock_union;
7030     WINED3DLOCKED_RECT dlock, slock;
7031     HRESULT hr = WINED3D_OK;
7032     int bpp, w, h, x, y;
7033     const BYTE *sbuf;
7034     BYTE *dbuf;
7035     RECT rsrc2;
7036
7037     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
7038             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
7039
7040     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface->flags & SFLAG_LOCKED))
7041     {
7042         WARN(" Surface is busy, returning DDERR_SURFACEBUSY\n");
7043         return WINEDDERR_SURFACEBUSY;
7044     }
7045
7046     if (!src_rect)
7047     {
7048         WARN("src_rect is NULL!\n");
7049         rsrc2.left = 0;
7050         rsrc2.top = 0;
7051         rsrc2.right = src_surface->resource.width;
7052         rsrc2.bottom = src_surface->resource.height;
7053         src_rect = &rsrc2;
7054     }
7055
7056     /* Check source rect for validity. Copied from normal Blt. Fixes Baldur's Gate. */
7057     if ((src_rect->bottom > src_surface->resource.height) || (src_rect->bottom < 0)
7058             || (src_rect->top > src_surface->resource.height) || (src_rect->top < 0)
7059             || (src_rect->left > src_surface->resource.width) || (src_rect->left < 0)
7060             || (src_rect->right > src_surface->resource.width) || (src_rect->right < 0)
7061             || (src_rect->right < src_rect->left) || (src_rect->bottom < src_rect->top))
7062     {
7063         WARN("Application gave us bad source rectangle for BltFast.\n");
7064         return WINEDDERR_INVALIDRECT;
7065     }
7066
7067     h = src_rect->bottom - src_rect->top;
7068     if (h > dst_surface->resource.height - dst_y)
7069         h = dst_surface->resource.height - dst_y;
7070     if (h > src_surface->resource.height - src_rect->top)
7071         h = src_surface->resource.height - src_rect->top;
7072     if (h <= 0)
7073         return WINEDDERR_INVALIDRECT;
7074
7075     w = src_rect->right - src_rect->left;
7076     if (w > dst_surface->resource.width - dst_x)
7077         w = dst_surface->resource.width - dst_x;
7078     if (w > src_surface->resource.width - src_rect->left)
7079         w = src_surface->resource.width - src_rect->left;
7080     if (w <= 0)
7081         return WINEDDERR_INVALIDRECT;
7082
7083     /* Now compute the locking rectangle... */
7084     lock_src.left = src_rect->left;
7085     lock_src.top = src_rect->top;
7086     lock_src.right = lock_src.left + w;
7087     lock_src.bottom = lock_src.top + h;
7088
7089     lock_dst.left = dst_x;
7090     lock_dst.top = dst_y;
7091     lock_dst.right = dst_x + w;
7092     lock_dst.bottom = dst_y + h;
7093
7094     bpp = dst_surface->resource.format->byte_count;
7095
7096     /* We need to lock the surfaces, or we won't get refreshes when done. */
7097     if (src_surface == dst_surface)
7098     {
7099         int pitch;
7100
7101         UnionRect(&lock_union, &lock_src, &lock_dst);
7102
7103         /* Lock the union of the two rectangles. */
7104         hr = wined3d_surface_map(dst_surface, &dlock, &lock_union, 0);
7105         if (FAILED(hr))
7106             goto error;
7107
7108         pitch = dlock.Pitch;
7109         slock.Pitch = dlock.Pitch;
7110
7111         /* Since slock was originally copied from this surface's description, we can just reuse it. */
7112         sbuf = dst_surface->resource.allocatedMemory + lock_src.top * pitch + lock_src.left * bpp;
7113         dbuf = dst_surface->resource.allocatedMemory + lock_dst.top * pitch + lock_dst.left * bpp;
7114         src_format = src_surface->resource.format;
7115         dst_format = src_format;
7116     }
7117     else
7118     {
7119         hr = wined3d_surface_map(src_surface, &slock, &lock_src, WINED3DLOCK_READONLY);
7120         if (FAILED(hr))
7121             goto error;
7122         hr = wined3d_surface_map(dst_surface, &dlock, &lock_dst, 0);
7123         if (FAILED(hr))
7124             goto error;
7125
7126         sbuf = slock.pBits;
7127         dbuf = dlock.pBits;
7128         TRACE("Dst is at %p, Src is at %p.\n", dbuf, sbuf);
7129
7130         src_format = src_surface->resource.format;
7131         dst_format = dst_surface->resource.format;
7132     }
7133
7134     /* Handle compressed surfaces first... */
7135     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
7136     {
7137         UINT row_block_count;
7138
7139         TRACE("compressed -> compressed copy\n");
7140         if (trans)
7141             FIXME("trans arg not supported when a compressed surface is involved\n");
7142         if (dst_x || dst_y)
7143             FIXME("offset for destination surface is not supported\n");
7144         if (src_surface->resource.format->id != dst_surface->resource.format->id)
7145         {
7146             FIXME("compressed -> compressed copy only supported for the same type of surface\n");
7147             hr = WINED3DERR_WRONGTEXTUREFORMAT;
7148             goto error;
7149         }
7150
7151         row_block_count = (w + dst_format->block_width - 1) / dst_format->block_width;
7152         for (y = 0; y < h; y += dst_format->block_height)
7153         {
7154             memcpy(dbuf, sbuf, row_block_count * dst_format->block_byte_count);
7155             dbuf += dlock.Pitch;
7156             sbuf += slock.Pitch;
7157         }
7158
7159         goto error;
7160     }
7161     if ((src_format->flags & WINED3DFMT_FLAG_COMPRESSED) && !(dst_format->flags & WINED3DFMT_FLAG_COMPRESSED))
7162     {
7163         /* TODO: Use the libtxc_dxtn.so shared library to do software
7164          * decompression. */
7165         ERR("Software decompression not supported.\n");
7166         goto error;
7167     }
7168
7169     if (trans & (WINEDDBLTFAST_SRCCOLORKEY | WINEDDBLTFAST_DESTCOLORKEY))
7170     {
7171         DWORD keylow, keyhigh;
7172         DWORD mask = src_surface->resource.format->red_mask
7173                 | src_surface->resource.format->green_mask
7174                 | src_surface->resource.format->blue_mask;
7175
7176         /* For some 8-bit formats like L8 and P8 color masks don't make sense */
7177         if (!mask && bpp == 1)
7178             mask = 0xff;
7179
7180         TRACE("Color keyed copy.\n");
7181         if (trans & WINEDDBLTFAST_SRCCOLORKEY)
7182         {
7183             keylow = src_surface->SrcBltCKey.dwColorSpaceLowValue;
7184             keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
7185         }
7186         else
7187         {
7188             /* I'm not sure if this is correct. */
7189             FIXME("WINEDDBLTFAST_DESTCOLORKEY not fully supported yet.\n");
7190             keylow = dst_surface->DestBltCKey.dwColorSpaceLowValue;
7191             keyhigh = dst_surface->DestBltCKey.dwColorSpaceHighValue;
7192         }
7193
7194 #define COPYBOX_COLORKEY(type) \
7195 do { \
7196     const type *s = (const type *)sbuf; \
7197     type *d = (type *)dbuf; \
7198     type tmp; \
7199     for (y = 0; y < h; y++) \
7200     { \
7201         for (x = 0; x < w; x++) \
7202         { \
7203             tmp = s[x]; \
7204             if ((tmp & mask) < keylow || (tmp & mask) > keyhigh) d[x] = tmp; \
7205         } \
7206         s = (const type *)((const BYTE *)s + slock.Pitch); \
7207         d = (type *)((BYTE *)d + dlock.Pitch); \
7208     } \
7209 } while(0)
7210
7211         switch (bpp)
7212         {
7213             case 1:
7214                 COPYBOX_COLORKEY(BYTE);
7215                 break;
7216             case 2:
7217                 COPYBOX_COLORKEY(WORD);
7218                 break;
7219             case 4:
7220                 COPYBOX_COLORKEY(DWORD);
7221                 break;
7222             case 3:
7223             {
7224                 const BYTE *s;
7225                 DWORD tmp;
7226                 BYTE *d;
7227                 s = sbuf;
7228                 d = dbuf;
7229                 for (y = 0; y < h; ++y)
7230                 {
7231                     for (x = 0; x < w * 3; x += 3)
7232                     {
7233                         tmp = (DWORD)s[x] + ((DWORD)s[x + 1] << 8) + ((DWORD)s[x + 2] << 16);
7234                         if (tmp < keylow || tmp > keyhigh)
7235                         {
7236                             d[x + 0] = s[x + 0];
7237                             d[x + 1] = s[x + 1];
7238                             d[x + 2] = s[x + 2];
7239                         }
7240                     }
7241                     s += slock.Pitch;
7242                     d += dlock.Pitch;
7243                 }
7244                 break;
7245             }
7246             default:
7247                 FIXME("Source color key blitting not supported for bpp %u.\n", bpp * 8);
7248                 hr = WINED3DERR_NOTAVAILABLE;
7249                 goto error;
7250         }
7251 #undef COPYBOX_COLORKEY
7252         TRACE("Copy done.\n");
7253     }
7254     else
7255     {
7256         int width = w * bpp;
7257         INT sbufpitch, dbufpitch;
7258
7259         TRACE("No color key copy.\n");
7260         /* Handle overlapping surfaces. */
7261         if (sbuf < dbuf)
7262         {
7263             sbuf += (h - 1) * slock.Pitch;
7264             dbuf += (h - 1) * dlock.Pitch;
7265             sbufpitch = -slock.Pitch;
7266             dbufpitch = -dlock.Pitch;
7267         }
7268         else
7269         {
7270             sbufpitch = slock.Pitch;
7271             dbufpitch = dlock.Pitch;
7272         }
7273         for (y = 0; y < h; ++y)
7274         {
7275             /* This is pretty easy, a line for line memcpy. */
7276             memmove(dbuf, sbuf, width);
7277             sbuf += sbufpitch;
7278             dbuf += dbufpitch;
7279         }
7280         TRACE("Copy done.\n");
7281     }
7282
7283 error:
7284     if (src_surface == dst_surface)
7285     {
7286         wined3d_surface_unmap(dst_surface);
7287     }
7288     else
7289     {
7290         wined3d_surface_unmap(dst_surface);
7291         wined3d_surface_unmap(src_surface);
7292     }
7293
7294     return hr;
7295 }
7296
7297 /* Do not call while under the GL lock. */
7298 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7299         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7300 {
7301     WINEDDBLTFX BltFx;
7302
7303     memset(&BltFx, 0, sizeof(BltFx));
7304     BltFx.dwSize = sizeof(BltFx);
7305     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface->resource.format, color);
7306     return wined3d_surface_blt(dst_surface, dst_rect, NULL, NULL,
7307             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7308 }
7309
7310 /* Do not call while under the GL lock. */
7311 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7312         struct wined3d_surface *surface, const RECT *rect, float depth)
7313 {
7314     FIXME("Depth filling not implemented by cpu_blit.\n");
7315     return WINED3DERR_INVALIDCALL;
7316 }
7317
7318 const struct blit_shader cpu_blit =  {
7319     cpu_blit_alloc,
7320     cpu_blit_free,
7321     cpu_blit_set,
7322     cpu_blit_unset,
7323     cpu_blit_supported,
7324     cpu_blit_color_fill,
7325     cpu_blit_depth_fill,
7326 };
7327
7328 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7329         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7330         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7331         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7332 {
7333     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7334     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7335     unsigned int resource_size;
7336     HRESULT hr;
7337
7338     if (multisample_quality > 0)
7339     {
7340         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7341         multisample_quality = 0;
7342     }
7343
7344     /* Quick lockable sanity check.
7345      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7346      * this function is too deep to need to care about things like this.
7347      * Levels need to be checked too, since they all affect what can be done. */
7348     switch (pool)
7349     {
7350         case WINED3DPOOL_SCRATCH:
7351             if (!lockable)
7352             {
7353                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7354                         "which are mutually exclusive, setting lockable to TRUE.\n");
7355                 lockable = TRUE;
7356             }
7357             break;
7358
7359         case WINED3DPOOL_SYSTEMMEM:
7360             if (!lockable)
7361                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7362             break;
7363
7364         case WINED3DPOOL_MANAGED:
7365             if (usage & WINED3DUSAGE_DYNAMIC)
7366                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7367             break;
7368
7369         case WINED3DPOOL_DEFAULT:
7370             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7371                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7372             break;
7373
7374         default:
7375             FIXME("Unknown pool %#x.\n", pool);
7376             break;
7377     };
7378
7379     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7380         FIXME("Trying to create a render target that isn't in the default pool.\n");
7381
7382     /* FIXME: Check that the format is supported by the device. */
7383
7384     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7385     if (!resource_size)
7386         return WINED3DERR_INVALIDCALL;
7387
7388     surface->surface_type = surface_type;
7389
7390     switch (surface_type)
7391     {
7392         case SURFACE_OPENGL:
7393             surface->surface_ops = &surface_ops;
7394             break;
7395
7396         case SURFACE_GDI:
7397             surface->surface_ops = &gdi_surface_ops;
7398             break;
7399
7400         default:
7401             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7402             return WINED3DERR_INVALIDCALL;
7403     }
7404
7405     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7406             multisample_type, multisample_quality, usage, pool, width, height, 1,
7407             resource_size, parent, parent_ops, &surface_resource_ops);
7408     if (FAILED(hr))
7409     {
7410         WARN("Failed to initialize resource, returning %#x.\n", hr);
7411         return hr;
7412     }
7413
7414     /* "Standalone" surface. */
7415     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7416
7417     surface->texture_level = level;
7418     list_init(&surface->overlays);
7419
7420     /* Flags */
7421     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7422     if (discard)
7423         surface->flags |= SFLAG_DISCARD;
7424     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7425         surface->flags |= SFLAG_LOCKABLE;
7426     /* I'm not sure if this qualifies as a hack or as an optimization. It
7427      * seems reasonable to assume that lockable render targets will get
7428      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7429      * creation. However, the other reason we want to do this is that several
7430      * ddraw applications access surface memory while the surface isn't
7431      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7432      * future locks prevents these from crashing. */
7433     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7434         surface->flags |= SFLAG_DYNLOCK;
7435
7436     /* Mark the texture as dirty so that it gets loaded first time around. */
7437     surface_add_dirty_rect(surface, NULL);
7438     list_init(&surface->renderbuffers);
7439
7440     TRACE("surface %p, memory %p, size %u\n",
7441             surface, surface->resource.allocatedMemory, surface->resource.size);
7442
7443     /* Call the private setup routine */
7444     hr = surface->surface_ops->surface_private_setup(surface);
7445     if (FAILED(hr))
7446     {
7447         ERR("Private setup failed, returning %#x\n", hr);
7448         surface->surface_ops->surface_cleanup(surface);
7449         return hr;
7450     }
7451
7452     return hr;
7453 }
7454
7455 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7456         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7457         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7458         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7459 {
7460     struct wined3d_surface *object;
7461     HRESULT hr;
7462
7463     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7464             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7465     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7466             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7467     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7468
7469     if (surface_type == SURFACE_OPENGL && !device->adapter)
7470     {
7471         ERR("OpenGL surfaces are not available without OpenGL.\n");
7472         return WINED3DERR_NOTAVAILABLE;
7473     }
7474
7475     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7476     if (!object)
7477     {
7478         ERR("Failed to allocate surface memory.\n");
7479         return WINED3DERR_OUTOFVIDEOMEMORY;
7480     }
7481
7482     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7483             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7484     if (FAILED(hr))
7485     {
7486         WARN("Failed to initialize surface, returning %#x.\n", hr);
7487         HeapFree(GetProcessHeap(), 0, object);
7488         return hr;
7489     }
7490
7491     TRACE("Created surface %p.\n", object);
7492     *surface = object;
7493
7494     return hr;
7495 }