dmloader: Simplify the module refcount handling.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT surface_cpu_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans);
41 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
42         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
43         WINED3DTEXTUREFILTERTYPE filter);
44
45 static void surface_cleanup(struct wined3d_surface *surface)
46 {
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO) || !list_empty(&surface->renderbuffers))
50     {
51         struct wined3d_renderbuffer_entry *entry, *entry2;
52         const struct wined3d_gl_info *gl_info;
53         struct wined3d_context *context;
54
55         context = context_acquire(surface->resource.device, NULL);
56         gl_info = context->gl_info;
57
58         ENTER_GL();
59
60         if (surface->texture_name)
61         {
62             TRACE("Deleting texture %u.\n", surface->texture_name);
63             glDeleteTextures(1, &surface->texture_name);
64         }
65
66         if (surface->flags & SFLAG_PBO)
67         {
68             TRACE("Deleting PBO %u.\n", surface->pbo);
69             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
70         }
71
72         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
73         {
74             TRACE("Deleting renderbuffer %u.\n", entry->id);
75             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
76             HeapFree(GetProcessHeap(), 0, entry);
77         }
78
79         LEAVE_GL();
80
81         context_release(context);
82     }
83
84     if (surface->flags & SFLAG_DIBSECTION)
85     {
86         /* Release the DC. */
87         SelectObject(surface->hDC, surface->dib.holdbitmap);
88         DeleteDC(surface->hDC);
89         /* Release the DIB section. */
90         DeleteObject(surface->dib.DIBsection);
91         surface->dib.bitmap_data = NULL;
92         surface->resource.allocatedMemory = NULL;
93     }
94
95     if (surface->flags & SFLAG_USERPTR)
96         wined3d_surface_set_mem(surface, NULL);
97     if (surface->overlay_dest)
98         list_remove(&surface->overlay_entry);
99
100     HeapFree(GetProcessHeap(), 0, surface->palette9);
101
102     resource_cleanup(&surface->resource);
103 }
104
105 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
106 {
107     TRACE("surface %p, container %p.\n", surface, container);
108
109     if (!container && type != WINED3D_CONTAINER_NONE)
110         ERR("Setting NULL container of type %#x.\n", type);
111
112     if (type == WINED3D_CONTAINER_SWAPCHAIN)
113     {
114         surface->get_drawable_size = get_drawable_size_swapchain;
115     }
116     else
117     {
118         switch (wined3d_settings.offscreen_rendering_mode)
119         {
120             case ORM_FBO:
121                 surface->get_drawable_size = get_drawable_size_fbo;
122                 break;
123
124             case ORM_BACKBUFFER:
125                 surface->get_drawable_size = get_drawable_size_backbuffer;
126                 break;
127
128             default:
129                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
130                 return;
131         }
132     }
133
134     surface->container.type = type;
135     surface->container.u.base = container;
136 }
137
138 struct blt_info
139 {
140     GLenum binding;
141     GLenum bind_target;
142     enum tex_types tex_type;
143     GLfloat coords[4][3];
144 };
145
146 struct float_rect
147 {
148     float l;
149     float t;
150     float r;
151     float b;
152 };
153
154 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
155 {
156     f->l = ((r->left * 2.0f) / w) - 1.0f;
157     f->t = ((r->top * 2.0f) / h) - 1.0f;
158     f->r = ((r->right * 2.0f) / w) - 1.0f;
159     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
160 }
161
162 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
163 {
164     GLfloat (*coords)[3] = info->coords;
165     struct float_rect f;
166
167     switch (target)
168     {
169         default:
170             FIXME("Unsupported texture target %#x\n", target);
171             /* Fall back to GL_TEXTURE_2D */
172         case GL_TEXTURE_2D:
173             info->binding = GL_TEXTURE_BINDING_2D;
174             info->bind_target = GL_TEXTURE_2D;
175             info->tex_type = tex_2d;
176             coords[0][0] = (float)rect->left / w;
177             coords[0][1] = (float)rect->top / h;
178             coords[0][2] = 0.0f;
179
180             coords[1][0] = (float)rect->right / w;
181             coords[1][1] = (float)rect->top / h;
182             coords[1][2] = 0.0f;
183
184             coords[2][0] = (float)rect->left / w;
185             coords[2][1] = (float)rect->bottom / h;
186             coords[2][2] = 0.0f;
187
188             coords[3][0] = (float)rect->right / w;
189             coords[3][1] = (float)rect->bottom / h;
190             coords[3][2] = 0.0f;
191             break;
192
193         case GL_TEXTURE_RECTANGLE_ARB:
194             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
195             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
196             info->tex_type = tex_rect;
197             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
198             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
199             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
200             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
201             break;
202
203         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
204             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
205             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
206             info->tex_type = tex_cube;
207             cube_coords_float(rect, w, h, &f);
208
209             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
210             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
211             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
212             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
213             break;
214
215         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
216             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
217             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
218             info->tex_type = tex_cube;
219             cube_coords_float(rect, w, h, &f);
220
221             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
222             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
223             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
224             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
225             break;
226
227         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
228             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
229             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
230             info->tex_type = tex_cube;
231             cube_coords_float(rect, w, h, &f);
232
233             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
234             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
235             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
236             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
237             break;
238
239         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
240             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
241             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
242             info->tex_type = tex_cube;
243             cube_coords_float(rect, w, h, &f);
244
245             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
246             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
247             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
248             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
249             break;
250
251         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
252             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
253             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
254             info->tex_type = tex_cube;
255             cube_coords_float(rect, w, h, &f);
256
257             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
258             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
259             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
260             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
261             break;
262
263         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
264             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
265             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
266             info->tex_type = tex_cube;
267             cube_coords_float(rect, w, h, &f);
268
269             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
270             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
271             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
272             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
273             break;
274     }
275 }
276
277 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
278 {
279     if (rect_in)
280         *rect_out = *rect_in;
281     else
282     {
283         rect_out->left = 0;
284         rect_out->top = 0;
285         rect_out->right = surface->resource.width;
286         rect_out->bottom = surface->resource.height;
287     }
288 }
289
290 /* GL locking and context activation is done by the caller */
291 void draw_textured_quad(const struct wined3d_surface *src_surface, const RECT *src_rect,
292         const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
293 {
294     struct blt_info info;
295
296     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
297
298     glEnable(info.bind_target);
299     checkGLcall("glEnable(bind_target)");
300
301     /* Bind the texture */
302     glBindTexture(info.bind_target, src_surface->texture_name);
303     checkGLcall("glBindTexture");
304
305     /* Filtering for StretchRect */
306     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
307             wined3d_gl_mag_filter(magLookup, Filter));
308     checkGLcall("glTexParameteri");
309     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
310             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
311     checkGLcall("glTexParameteri");
312     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
313     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
314     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
315     checkGLcall("glTexEnvi");
316
317     /* Draw a quad */
318     glBegin(GL_TRIANGLE_STRIP);
319     glTexCoord3fv(info.coords[0]);
320     glVertex2i(dst_rect->left, dst_rect->top);
321
322     glTexCoord3fv(info.coords[1]);
323     glVertex2i(dst_rect->right, dst_rect->top);
324
325     glTexCoord3fv(info.coords[2]);
326     glVertex2i(dst_rect->left, dst_rect->bottom);
327
328     glTexCoord3fv(info.coords[3]);
329     glVertex2i(dst_rect->right, dst_rect->bottom);
330     glEnd();
331
332     /* Unbind the texture */
333     glBindTexture(info.bind_target, 0);
334     checkGLcall("glBindTexture(info->bind_target, 0)");
335
336     /* We changed the filtering settings on the texture. Inform the
337      * container about this to get the filters reset properly next draw. */
338     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
339     {
340         struct wined3d_texture *texture = src_surface->container.u.texture;
341         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
342         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
343         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
344     }
345 }
346
347 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
348 {
349     const struct wined3d_format *format = surface->resource.format;
350     SYSTEM_INFO sysInfo;
351     BITMAPINFO *b_info;
352     int extraline = 0;
353     DWORD *masks;
354     UINT usage;
355     HDC dc;
356
357     TRACE("surface %p.\n", surface);
358
359     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
360     {
361         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
362         return WINED3DERR_INVALIDCALL;
363     }
364
365     switch (format->byte_count)
366     {
367         case 2:
368         case 4:
369             /* Allocate extra space to store the RGB bit masks. */
370             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
371             break;
372
373         case 3:
374             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
375             break;
376
377         default:
378             /* Allocate extra space for a palette. */
379             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
380                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
381             break;
382     }
383
384     if (!b_info)
385         return E_OUTOFMEMORY;
386
387     /* Some applications access the surface in via DWORDs, and do not take
388      * the necessary care at the end of the surface. So we need at least
389      * 4 extra bytes at the end of the surface. Check against the page size,
390      * if the last page used for the surface has at least 4 spare bytes we're
391      * safe, otherwise add an extra line to the DIB section. */
392     GetSystemInfo(&sysInfo);
393     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
394     {
395         extraline = 1;
396         TRACE("Adding an extra line to the DIB section.\n");
397     }
398
399     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
400     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
401     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
402     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
403     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
404             * wined3d_surface_get_pitch(surface);
405     b_info->bmiHeader.biPlanes = 1;
406     b_info->bmiHeader.biBitCount = format->byte_count * 8;
407
408     b_info->bmiHeader.biXPelsPerMeter = 0;
409     b_info->bmiHeader.biYPelsPerMeter = 0;
410     b_info->bmiHeader.biClrUsed = 0;
411     b_info->bmiHeader.biClrImportant = 0;
412
413     /* Get the bit masks */
414     masks = (DWORD *)b_info->bmiColors;
415     switch (surface->resource.format->id)
416     {
417         case WINED3DFMT_B8G8R8_UNORM:
418             usage = DIB_RGB_COLORS;
419             b_info->bmiHeader.biCompression = BI_RGB;
420             break;
421
422         case WINED3DFMT_B5G5R5X1_UNORM:
423         case WINED3DFMT_B5G5R5A1_UNORM:
424         case WINED3DFMT_B4G4R4A4_UNORM:
425         case WINED3DFMT_B4G4R4X4_UNORM:
426         case WINED3DFMT_B2G3R3_UNORM:
427         case WINED3DFMT_B2G3R3A8_UNORM:
428         case WINED3DFMT_R10G10B10A2_UNORM:
429         case WINED3DFMT_R8G8B8A8_UNORM:
430         case WINED3DFMT_R8G8B8X8_UNORM:
431         case WINED3DFMT_B10G10R10A2_UNORM:
432         case WINED3DFMT_B5G6R5_UNORM:
433         case WINED3DFMT_R16G16B16A16_UNORM:
434             usage = 0;
435             b_info->bmiHeader.biCompression = BI_BITFIELDS;
436             masks[0] = format->red_mask;
437             masks[1] = format->green_mask;
438             masks[2] = format->blue_mask;
439             break;
440
441         default:
442             /* Don't know palette */
443             b_info->bmiHeader.biCompression = BI_RGB;
444             usage = 0;
445             break;
446     }
447
448     if (!(dc = GetDC(0)))
449     {
450         HeapFree(GetProcessHeap(), 0, b_info);
451         return HRESULT_FROM_WIN32(GetLastError());
452     }
453
454     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
455             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
456             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
457     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
458     ReleaseDC(0, dc);
459
460     if (!surface->dib.DIBsection)
461     {
462         ERR("Failed to create DIB section.\n");
463         HeapFree(GetProcessHeap(), 0, b_info);
464         return HRESULT_FROM_WIN32(GetLastError());
465     }
466
467     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
468     /* Copy the existing surface to the dib section. */
469     if (surface->resource.allocatedMemory)
470     {
471         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
472                 surface->resource.height * wined3d_surface_get_pitch(surface));
473     }
474     else
475     {
476         /* This is to make maps read the GL texture although memory is allocated. */
477         surface->flags &= ~SFLAG_INSYSMEM;
478     }
479     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
480
481     HeapFree(GetProcessHeap(), 0, b_info);
482
483     /* Now allocate a DC. */
484     surface->hDC = CreateCompatibleDC(0);
485     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
486     TRACE("Using wined3d palette %p.\n", surface->palette);
487     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
488
489     surface->flags |= SFLAG_DIBSECTION;
490
491     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
492     surface->resource.heapMemory = NULL;
493
494     return WINED3D_OK;
495 }
496
497 static void surface_prepare_system_memory(struct wined3d_surface *surface)
498 {
499     struct wined3d_device *device = surface->resource.device;
500     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
501
502     TRACE("surface %p.\n", surface);
503
504     /* Performance optimization: Count how often a surface is locked, if it is
505      * locked regularly do not throw away the system memory copy. This avoids
506      * the need to download the surface from OpenGL all the time. The surface
507      * is still downloaded if the OpenGL texture is changed. */
508     if (!(surface->flags & SFLAG_DYNLOCK))
509     {
510         if (++surface->lockCount > MAXLOCKCOUNT)
511         {
512             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
513             surface->flags |= SFLAG_DYNLOCK;
514         }
515     }
516
517     /* Create a PBO for dynamically locked surfaces but don't do it for
518      * converted or NPOT surfaces. Also don't create a PBO for systemmem
519      * surfaces. */
520     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
521             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
522             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
523     {
524         struct wined3d_context *context;
525         GLenum error;
526
527         context = context_acquire(device, NULL);
528         ENTER_GL();
529
530         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
531         error = glGetError();
532         if (!surface->pbo || error != GL_NO_ERROR)
533             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
534
535         TRACE("Binding PBO %u.\n", surface->pbo);
536
537         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
538         checkGLcall("glBindBufferARB");
539
540         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
541                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
542         checkGLcall("glBufferDataARB");
543
544         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
545         checkGLcall("glBindBufferARB");
546
547         /* We don't need the system memory anymore and we can't even use it for PBOs. */
548         if (!(surface->flags & SFLAG_CLIENT))
549         {
550             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
551             surface->resource.heapMemory = NULL;
552         }
553         surface->resource.allocatedMemory = NULL;
554         surface->flags |= SFLAG_PBO;
555         LEAVE_GL();
556         context_release(context);
557     }
558     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
559     {
560         /* Whatever surface we have, make sure that there is memory allocated
561          * for the downloaded copy, or a PBO to map. */
562         if (!surface->resource.heapMemory)
563             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
564
565         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
566                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
567
568         if (surface->flags & SFLAG_INSYSMEM)
569             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
570     }
571 }
572
573 static void surface_evict_sysmem(struct wined3d_surface *surface)
574 {
575     if (surface->flags & SFLAG_DONOTFREE)
576         return;
577
578     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
579     surface->resource.allocatedMemory = NULL;
580     surface->resource.heapMemory = NULL;
581     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
582 }
583
584 /* Context activation is done by the caller. */
585 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
586         const struct wined3d_gl_info *gl_info, BOOL srgb)
587 {
588     struct wined3d_device *device = surface->resource.device;
589     DWORD active_sampler;
590     GLint active_texture;
591
592     /* We don't need a specific texture unit, but after binding the texture
593      * the current unit is dirty. Read the unit back instead of switching to
594      * 0, this avoids messing around with the state manager's GL states. The
595      * current texture unit should always be a valid one.
596      *
597      * To be more specific, this is tricky because we can implicitly be
598      * called from sampler() in state.c. This means we can't touch anything
599      * other than whatever happens to be the currently active texture, or we
600      * would risk marking already applied sampler states dirty again.
601      *
602      * TODO: Track the current active texture per GL context instead of using
603      * glGet(). */
604
605     ENTER_GL();
606     glGetIntegerv(GL_ACTIVE_TEXTURE, &active_texture);
607     LEAVE_GL();
608     active_sampler = device->rev_tex_unit_map[active_texture - GL_TEXTURE0_ARB];
609
610     if (active_sampler != WINED3D_UNMAPPED_STAGE)
611         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
612     surface_bind(surface, gl_info, srgb);
613 }
614
615 static void surface_force_reload(struct wined3d_surface *surface)
616 {
617     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
618 }
619
620 static void surface_release_client_storage(struct wined3d_surface *surface)
621 {
622     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
623
624     ENTER_GL();
625     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
626     if (surface->texture_name)
627     {
628         surface_bind_and_dirtify(surface, context->gl_info, FALSE);
629         glTexImage2D(surface->texture_target, surface->texture_level,
630                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
631     }
632     if (surface->texture_name_srgb)
633     {
634         surface_bind_and_dirtify(surface, context->gl_info, TRUE);
635         glTexImage2D(surface->texture_target, surface->texture_level,
636                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
637     }
638     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
639     LEAVE_GL();
640
641     context_release(context);
642
643     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
644     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
645     surface_force_reload(surface);
646 }
647
648 static HRESULT surface_private_setup(struct wined3d_surface *surface)
649 {
650     /* TODO: Check against the maximum texture sizes supported by the video card. */
651     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
652     unsigned int pow2Width, pow2Height;
653
654     TRACE("surface %p.\n", surface);
655
656     surface->texture_name = 0;
657     surface->texture_target = GL_TEXTURE_2D;
658
659     /* Non-power2 support */
660     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
661     {
662         pow2Width = surface->resource.width;
663         pow2Height = surface->resource.height;
664     }
665     else
666     {
667         /* Find the nearest pow2 match */
668         pow2Width = pow2Height = 1;
669         while (pow2Width < surface->resource.width)
670             pow2Width <<= 1;
671         while (pow2Height < surface->resource.height)
672             pow2Height <<= 1;
673     }
674     surface->pow2Width = pow2Width;
675     surface->pow2Height = pow2Height;
676
677     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
678     {
679         /* TODO: Add support for non power two compressed textures. */
680         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
681         {
682             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
683                   surface, surface->resource.width, surface->resource.height);
684             return WINED3DERR_NOTAVAILABLE;
685         }
686     }
687
688     if (pow2Width != surface->resource.width
689             || pow2Height != surface->resource.height)
690     {
691         surface->flags |= SFLAG_NONPOW2;
692     }
693
694     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
695             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
696     {
697         /* One of three options:
698          * 1: Do the same as we do with NPOT and scale the texture, (any
699          *    texture ops would require the texture to be scaled which is
700          *    potentially slow)
701          * 2: Set the texture to the maximum size (bad idea).
702          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
703          * 4: Create the surface, but allow it to be used only for DirectDraw
704          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
705          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
706          *    the render target. */
707         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
708         {
709             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
710             return WINED3DERR_NOTAVAILABLE;
711         }
712
713         /* We should never use this surface in combination with OpenGL! */
714         TRACE("Creating an oversized surface: %ux%u.\n",
715                 surface->pow2Width, surface->pow2Height);
716     }
717     else
718     {
719         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
720          * and EXT_PALETTED_TEXTURE is used in combination with texture
721          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
722          * EXT_PALETTED_TEXTURE doesn't work in combination with
723          * ARB_TEXTURE_RECTANGLE. */
724         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
725                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
726                 && gl_info->supported[EXT_PALETTED_TEXTURE]
727                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
728         {
729             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
730             surface->pow2Width = surface->resource.width;
731             surface->pow2Height = surface->resource.height;
732             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
733         }
734     }
735
736     switch (wined3d_settings.offscreen_rendering_mode)
737     {
738         case ORM_FBO:
739             surface->get_drawable_size = get_drawable_size_fbo;
740             break;
741
742         case ORM_BACKBUFFER:
743             surface->get_drawable_size = get_drawable_size_backbuffer;
744             break;
745
746         default:
747             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
748             return WINED3DERR_INVALIDCALL;
749     }
750
751     surface->flags |= SFLAG_INSYSMEM;
752
753     return WINED3D_OK;
754 }
755
756 static void surface_realize_palette(struct wined3d_surface *surface)
757 {
758     struct wined3d_palette *palette = surface->palette;
759
760     TRACE("surface %p.\n", surface);
761
762     if (!palette) return;
763
764     if (surface->resource.format->id == WINED3DFMT_P8_UINT
765             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
766     {
767         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
768         {
769             /* Make sure the texture is up to date. This call doesn't do
770              * anything if the texture is already up to date. */
771             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
772
773             /* We want to force a palette refresh, so mark the drawable as not being up to date */
774             if (!surface_is_offscreen(surface))
775                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
776         }
777         else
778         {
779             if (!(surface->flags & SFLAG_INSYSMEM))
780             {
781                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
782                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
783             }
784             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
785         }
786     }
787
788     if (surface->flags & SFLAG_DIBSECTION)
789     {
790         RGBQUAD col[256];
791         unsigned int i;
792
793         TRACE("Updating the DC's palette.\n");
794
795         for (i = 0; i < 256; ++i)
796         {
797             col[i].rgbRed   = palette->palents[i].peRed;
798             col[i].rgbGreen = palette->palents[i].peGreen;
799             col[i].rgbBlue  = palette->palents[i].peBlue;
800             col[i].rgbReserved = 0;
801         }
802         SetDIBColorTable(surface->hDC, 0, 256, col);
803     }
804
805     /* Propagate the changes to the drawable when we have a palette. */
806     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
807         surface_load_location(surface, SFLAG_INDRAWABLE, NULL);
808 }
809
810 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
811 {
812     HRESULT hr;
813
814     /* If there's no destination surface there is nothing to do. */
815     if (!surface->overlay_dest)
816         return WINED3D_OK;
817
818     /* Blt calls ModifyLocation on the dest surface, which in turn calls
819      * DrawOverlay to update the overlay. Prevent an endless recursion. */
820     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
821         return WINED3D_OK;
822
823     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
824     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
825             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
826     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
827
828     return hr;
829 }
830
831 static void surface_preload(struct wined3d_surface *surface)
832 {
833     TRACE("surface %p.\n", surface);
834
835     surface_internal_preload(surface, SRGB_ANY);
836 }
837
838 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
839 {
840     struct wined3d_device *device = surface->resource.device;
841     const RECT *pass_rect = rect;
842
843     TRACE("surface %p, rect %s, flags %#x.\n",
844             surface, wine_dbgstr_rect(rect), flags);
845
846     if (flags & WINED3DLOCK_DISCARD)
847     {
848         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
849         surface_prepare_system_memory(surface);
850         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
851     }
852     else
853     {
854         /* surface_load_location() does not check if the rectangle specifies
855          * the full surface. Most callers don't need that, so do it here. */
856         if (rect && !rect->top && !rect->left
857                 && rect->right == surface->resource.width
858                 && rect->bottom == surface->resource.height)
859             pass_rect = NULL;
860
861         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
862                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
863                 || surface == device->fb.render_targets[0])))
864             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
865     }
866
867     if (surface->flags & SFLAG_PBO)
868     {
869         const struct wined3d_gl_info *gl_info;
870         struct wined3d_context *context;
871
872         context = context_acquire(device, NULL);
873         gl_info = context->gl_info;
874
875         ENTER_GL();
876         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
877         checkGLcall("glBindBufferARB");
878
879         /* This shouldn't happen but could occur if some other function
880          * didn't handle the PBO properly. */
881         if (surface->resource.allocatedMemory)
882             ERR("The surface already has PBO memory allocated.\n");
883
884         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
885         checkGLcall("glMapBufferARB");
886
887         /* Make sure the PBO isn't set anymore in order not to break non-PBO
888          * calls. */
889         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
890         checkGLcall("glBindBufferARB");
891
892         LEAVE_GL();
893         context_release(context);
894     }
895
896     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
897     {
898         if (!rect)
899             surface_add_dirty_rect(surface, NULL);
900         else
901         {
902             WINED3DBOX b;
903
904             b.Left = rect->left;
905             b.Top = rect->top;
906             b.Right = rect->right;
907             b.Bottom = rect->bottom;
908             b.Front = 0;
909             b.Back = 1;
910             surface_add_dirty_rect(surface, &b);
911         }
912     }
913 }
914
915 static void surface_unmap(struct wined3d_surface *surface)
916 {
917     struct wined3d_device *device = surface->resource.device;
918     BOOL fullsurface;
919
920     TRACE("surface %p.\n", surface);
921
922     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
923
924     if (surface->flags & SFLAG_PBO)
925     {
926         const struct wined3d_gl_info *gl_info;
927         struct wined3d_context *context;
928
929         TRACE("Freeing PBO memory.\n");
930
931         context = context_acquire(device, NULL);
932         gl_info = context->gl_info;
933
934         ENTER_GL();
935         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
936         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
937         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
938         checkGLcall("glUnmapBufferARB");
939         LEAVE_GL();
940         context_release(context);
941
942         surface->resource.allocatedMemory = NULL;
943     }
944
945     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
946
947     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
948     {
949         TRACE("Not dirtified, nothing to do.\n");
950         goto done;
951     }
952
953     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
954             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
955     {
956         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
957         {
958             static BOOL warned = FALSE;
959             if (!warned)
960             {
961                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
962                 warned = TRUE;
963             }
964             goto done;
965         }
966
967         if (!surface->dirtyRect.left && !surface->dirtyRect.top
968                 && surface->dirtyRect.right == surface->resource.width
969                 && surface->dirtyRect.bottom == surface->resource.height)
970         {
971             fullsurface = TRUE;
972         }
973         else
974         {
975             /* TODO: Proper partial rectangle tracking. */
976             fullsurface = FALSE;
977             surface->flags |= SFLAG_INSYSMEM;
978         }
979
980         surface_load_location(surface, SFLAG_INDRAWABLE, fullsurface ? NULL : &surface->dirtyRect);
981
982         /* Partial rectangle tracking is not commonly implemented, it is only
983          * done for render targets. INSYSMEM was set before to tell
984          * surface_load_location() where to read the rectangle from.
985          * Indrawable is set because all modifications from the partial
986          * sysmem copy are written back to the drawable, thus the surface is
987          * merged again in the drawable. The sysmem copy is not fully up to
988          * date because only a subrectangle was read in Map(). */
989         if (!fullsurface)
990         {
991             surface_modify_location(surface, SFLAG_INDRAWABLE, TRUE);
992             surface_evict_sysmem(surface);
993         }
994
995         surface->dirtyRect.left = surface->resource.width;
996         surface->dirtyRect.top = surface->resource.height;
997         surface->dirtyRect.right = 0;
998         surface->dirtyRect.bottom = 0;
999     }
1000     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1001     {
1002         FIXME("Depth / stencil buffer locking is not implemented.\n");
1003     }
1004
1005 done:
1006     /* Overlays have to be redrawn manually after changes with the GL implementation */
1007     if (surface->overlay_dest)
1008         surface->surface_ops->surface_draw_overlay(surface);
1009 }
1010
1011 static HRESULT surface_getdc(struct wined3d_surface *surface)
1012 {
1013     WINED3DLOCKED_RECT lock;
1014     HRESULT hr;
1015
1016     TRACE("surface %p.\n", surface);
1017
1018     /* Create a DIB section if there isn't a dc yet. */
1019     if (!surface->hDC)
1020     {
1021         if (surface->flags & SFLAG_CLIENT)
1022         {
1023             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1024             surface_release_client_storage(surface);
1025         }
1026         hr = surface_create_dib_section(surface);
1027         if (FAILED(hr))
1028             return WINED3DERR_INVALIDCALL;
1029
1030         /* Use the DIB section from now on if we are not using a PBO. */
1031         if (!(surface->flags & SFLAG_PBO))
1032             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1033     }
1034
1035     /* Map the surface. */
1036     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1037     if (FAILED(hr))
1038         ERR("Map failed, hr %#x.\n", hr);
1039
1040     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1041      * activates the allocatedMemory. */
1042     if (surface->flags & SFLAG_PBO)
1043         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->dib.bitmap_size);
1044
1045     return hr;
1046 }
1047
1048 static HRESULT surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1049 {
1050     TRACE("surface %p, override %p.\n", surface, override);
1051
1052     /* Flipping is only supported on render targets and overlays. */
1053     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
1054     {
1055         WARN("Tried to flip a non-render target, non-overlay surface.\n");
1056         return WINEDDERR_NOTFLIPPABLE;
1057     }
1058
1059     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1060     {
1061         flip_surface(surface, override);
1062
1063         /* Update the overlay if it is visible */
1064         if (surface->overlay_dest)
1065             return surface->surface_ops->surface_draw_overlay(surface);
1066         else
1067             return WINED3D_OK;
1068     }
1069
1070     return WINED3D_OK;
1071 }
1072
1073 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1074 {
1075     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1076         return FALSE;
1077     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1078         return FALSE;
1079     return TRUE;
1080 }
1081
1082 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1083         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1084 {
1085     const struct wined3d_gl_info *gl_info;
1086     struct wined3d_context *context;
1087     DWORD src_mask, dst_mask;
1088     GLbitfield gl_mask;
1089
1090     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1091             device, src_surface, wine_dbgstr_rect(src_rect),
1092             dst_surface, wine_dbgstr_rect(dst_rect));
1093
1094     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1095     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1096
1097     if (src_mask != dst_mask)
1098     {
1099         ERR("Incompatible formats %s and %s.\n",
1100                 debug_d3dformat(src_surface->resource.format->id),
1101                 debug_d3dformat(dst_surface->resource.format->id));
1102         return;
1103     }
1104
1105     if (!src_mask)
1106     {
1107         ERR("Not a depth / stencil format: %s.\n",
1108                 debug_d3dformat(src_surface->resource.format->id));
1109         return;
1110     }
1111
1112     gl_mask = 0;
1113     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1114         gl_mask |= GL_DEPTH_BUFFER_BIT;
1115     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1116         gl_mask |= GL_STENCIL_BUFFER_BIT;
1117
1118     /* Make sure the locations are up-to-date. Loading the destination
1119      * surface isn't required if the entire surface is overwritten. */
1120     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1121     if (!surface_is_full_rect(dst_surface, dst_rect))
1122         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1123
1124     context = context_acquire(device, NULL);
1125     if (!context->valid)
1126     {
1127         context_release(context);
1128         WARN("Invalid context, skipping blit.\n");
1129         return;
1130     }
1131
1132     gl_info = context->gl_info;
1133
1134     ENTER_GL();
1135
1136     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1137     glReadBuffer(GL_NONE);
1138     checkGLcall("glReadBuffer()");
1139     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1140
1141     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1142     context_set_draw_buffer(context, GL_NONE);
1143     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1144
1145     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1146     {
1147         glDepthMask(GL_TRUE);
1148         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1149     }
1150     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1151     {
1152         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1153         {
1154             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1155             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1156         }
1157         glStencilMask(~0U);
1158         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1159     }
1160
1161     glDisable(GL_SCISSOR_TEST);
1162     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1163
1164     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1165             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1166     checkGLcall("glBlitFramebuffer()");
1167
1168     LEAVE_GL();
1169
1170     if (wined3d_settings.strict_draw_ordering)
1171         wglFlush(); /* Flush to ensure ordering across contexts. */
1172
1173     context_release(context);
1174 }
1175
1176 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1177         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1178         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1179 {
1180     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1181         return FALSE;
1182
1183     /* Source and/or destination need to be on the GL side */
1184     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1185         return FALSE;
1186
1187     switch (blit_op)
1188     {
1189         case WINED3D_BLIT_OP_COLOR_BLIT:
1190             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1191                 return FALSE;
1192             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1193                 return FALSE;
1194             break;
1195
1196         case WINED3D_BLIT_OP_DEPTH_BLIT:
1197             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1198                 return FALSE;
1199             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1200                 return FALSE;
1201             break;
1202
1203         default:
1204             return FALSE;
1205     }
1206
1207     if (!(src_format->id == dst_format->id
1208             || (is_identity_fixup(src_format->color_fixup)
1209             && is_identity_fixup(dst_format->color_fixup))))
1210         return FALSE;
1211
1212     return TRUE;
1213 }
1214
1215 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1216 {
1217     const struct wined3d_format *format = surface->resource.format;
1218
1219     switch (format->id)
1220     {
1221         case WINED3DFMT_S1_UINT_D15_UNORM:
1222             *float_depth = depth / (float)0x00007fff;
1223             break;
1224
1225         case WINED3DFMT_D16_UNORM:
1226             *float_depth = depth / (float)0x0000ffff;
1227             break;
1228
1229         case WINED3DFMT_D24_UNORM_S8_UINT:
1230         case WINED3DFMT_X8D24_UNORM:
1231             *float_depth = depth / (float)0x00ffffff;
1232             break;
1233
1234         case WINED3DFMT_D32_UNORM:
1235             *float_depth = depth / (float)0xffffffff;
1236             break;
1237
1238         default:
1239             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1240             return FALSE;
1241     }
1242
1243     return TRUE;
1244 }
1245
1246 /* Do not call while under the GL lock. */
1247 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1248 {
1249     const struct wined3d_resource *resource = &surface->resource;
1250     struct wined3d_device *device = resource->device;
1251     const struct blit_shader *blitter;
1252
1253     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1254             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1255     if (!blitter)
1256     {
1257         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1258         return WINED3DERR_INVALIDCALL;
1259     }
1260
1261     return blitter->depth_fill(device, surface, rect, depth);
1262 }
1263
1264 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1265         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1266 {
1267     struct wined3d_device *device = src_surface->resource.device;
1268
1269     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1270             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1271             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1272         return WINED3DERR_INVALIDCALL;
1273
1274     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1275
1276     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1277             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1278     surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
1279
1280     return WINED3D_OK;
1281 }
1282
1283 /* Do not call while under the GL lock. */
1284 static HRESULT surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
1285         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
1286         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1287 {
1288     struct wined3d_device *device = dst_surface->resource.device;
1289     DWORD src_ds_flags, dst_ds_flags;
1290     static const DWORD simple_blit = WINEDDBLT_ASYNC
1291             | WINEDDBLT_COLORFILL
1292             | WINEDDBLT_WAIT
1293             | WINEDDBLT_DEPTHFILL
1294             | WINEDDBLT_DONOTWAIT;
1295
1296     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1297             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
1298             flags, fx, debug_d3dtexturefiltertype(filter));
1299     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1300
1301     if (flags & ~simple_blit)
1302     {
1303         WARN("Using fallback for complex blit (%#x).\n", flags);
1304         goto fallback;
1305     }
1306
1307     if (!device->d3d_initialized)
1308     {
1309         WARN("D3D not initialized, using fallback.\n");
1310         goto fallback;
1311     }
1312
1313     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1314     if (src_surface)
1315         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1316     else
1317         src_ds_flags = 0;
1318
1319     if (src_ds_flags || dst_ds_flags)
1320     {
1321         if (flags & WINEDDBLT_DEPTHFILL)
1322         {
1323             float depth;
1324
1325             TRACE("Depth fill.\n");
1326
1327             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1328                 return WINED3DERR_INVALIDCALL;
1329
1330             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, dst_rect, depth)))
1331                 return WINED3D_OK;
1332         }
1333         else
1334         {
1335             /* Accessing depth / stencil surfaces is supposed to fail while in
1336              * a scene, except for fills, which seem to work. */
1337             if (device->inScene)
1338             {
1339                 WARN("Rejecting depth / stencil access while in scene.\n");
1340                 return WINED3DERR_INVALIDCALL;
1341             }
1342
1343             if (src_ds_flags != dst_ds_flags)
1344             {
1345                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1346                 return WINED3DERR_INVALIDCALL;
1347             }
1348
1349             if (src_rect->top || src_rect->left
1350                     || src_rect->bottom != src_surface->resource.height
1351                     || src_rect->right != src_surface->resource.width)
1352             {
1353                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1354                         wine_dbgstr_rect(src_rect));
1355                 return WINED3DERR_INVALIDCALL;
1356             }
1357
1358             if (dst_rect->top || dst_rect->left
1359                     || dst_rect->bottom != dst_surface->resource.height
1360                     || dst_rect->right != dst_surface->resource.width)
1361             {
1362                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1363                         wine_dbgstr_rect(src_rect));
1364                 return WINED3DERR_INVALIDCALL;
1365             }
1366
1367             if (src_surface->resource.height != dst_surface->resource.height
1368                     || src_surface->resource.width != dst_surface->resource.width)
1369             {
1370                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1371                 return WINED3DERR_INVALIDCALL;
1372             }
1373
1374             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, src_rect, dst_surface, dst_rect)))
1375                 return WINED3D_OK;
1376         }
1377     }
1378
1379 fallback:
1380
1381     /* Special cases for render targets. */
1382     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1383             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1384     {
1385         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, dst_rect,
1386                 src_surface, src_rect, flags, fx, filter)))
1387             return WINED3D_OK;
1388     }
1389
1390     /* For the rest call the X11 surface implementation. For render targets
1391      * this should be implemented OpenGL accelerated in BltOverride, other
1392      * blits are rather rare. */
1393     return surface_cpu_blt(dst_surface, dst_rect, src_surface, src_rect, flags, fx, filter);
1394 }
1395
1396 /* Do not call while under the GL lock. */
1397 static HRESULT surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1398         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1399 {
1400     struct wined3d_device *device = dst_surface->resource.device;
1401
1402     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
1403             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1404
1405     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface->flags & SFLAG_LOCKED))
1406     {
1407         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1408         return WINEDDERR_SURFACEBUSY;
1409     }
1410
1411     if (device->inScene && (dst_surface == device->fb.depth_stencil || src_surface == device->fb.depth_stencil))
1412     {
1413         WARN("Attempt to access the depth / stencil surface while in a scene.\n");
1414         return WINED3DERR_INVALIDCALL;
1415     }
1416
1417     /* Special cases for RenderTargets */
1418     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1419             || (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
1420     {
1421
1422         RECT src_rect, dst_rect;
1423         DWORD flags = 0;
1424
1425         surface_get_rect(src_surface, src_rect_in, &src_rect);
1426
1427         dst_rect.left = dst_x;
1428         dst_rect.top = dst_y;
1429         dst_rect.right = dst_x + src_rect.right - src_rect.left;
1430         dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1431
1432         /* Convert BltFast flags into Blt ones because BltOverride is called
1433          * from Blt as well. */
1434         if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1435             flags |= WINEDDBLT_KEYSRC;
1436         if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1437             flags |= WINEDDBLT_KEYDEST;
1438         if (trans & WINEDDBLTFAST_WAIT)
1439             flags |= WINEDDBLT_WAIT;
1440         if (trans & WINEDDBLTFAST_DONOTWAIT)
1441             flags |= WINEDDBLT_DONOTWAIT;
1442
1443         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface,
1444                 &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT)))
1445             return WINED3D_OK;
1446     }
1447
1448     return surface_cpu_bltfast(dst_surface, dst_x, dst_y, src_surface, src_rect_in, trans);
1449 }
1450
1451 static HRESULT surface_set_mem(struct wined3d_surface *surface, void *mem)
1452 {
1453     TRACE("surface %p, mem %p.\n", surface, mem);
1454
1455     if (mem && mem != surface->resource.allocatedMemory)
1456     {
1457         void *release = NULL;
1458
1459         /* Do I have to copy the old surface content? */
1460         if (surface->flags & SFLAG_DIBSECTION)
1461         {
1462             SelectObject(surface->hDC, surface->dib.holdbitmap);
1463             DeleteDC(surface->hDC);
1464             /* Release the DIB section. */
1465             DeleteObject(surface->dib.DIBsection);
1466             surface->dib.bitmap_data = NULL;
1467             surface->resource.allocatedMemory = NULL;
1468             surface->hDC = NULL;
1469             surface->flags &= ~SFLAG_DIBSECTION;
1470         }
1471         else if (!(surface->flags & SFLAG_USERPTR))
1472         {
1473             release = surface->resource.heapMemory;
1474             surface->resource.heapMemory = NULL;
1475         }
1476         surface->resource.allocatedMemory = mem;
1477         surface->flags |= SFLAG_USERPTR;
1478
1479         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
1480         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1481
1482         /* For client textures OpenGL has to be notified. */
1483         if (surface->flags & SFLAG_CLIENT)
1484             surface_release_client_storage(surface);
1485
1486         /* Now free the old memory if any. */
1487         HeapFree(GetProcessHeap(), 0, release);
1488     }
1489     else if (surface->flags & SFLAG_USERPTR)
1490     {
1491         /* Map and GetDC will re-create the dib section and allocated memory. */
1492         surface->resource.allocatedMemory = NULL;
1493         /* HeapMemory should be NULL already. */
1494         if (surface->resource.heapMemory)
1495             ERR("User pointer surface has heap memory allocated.\n");
1496         surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
1497
1498         if (surface->flags & SFLAG_CLIENT)
1499             surface_release_client_storage(surface);
1500
1501         surface_prepare_system_memory(surface);
1502         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1503     }
1504
1505     return WINED3D_OK;
1506 }
1507
1508 /* Context activation is done by the caller. */
1509 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1510 {
1511     if (!surface->resource.heapMemory)
1512     {
1513         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1514         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1515                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1516     }
1517
1518     ENTER_GL();
1519     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1520     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1521     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1522             surface->resource.size, surface->resource.allocatedMemory));
1523     checkGLcall("glGetBufferSubDataARB");
1524     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1525     checkGLcall("glDeleteBuffersARB");
1526     LEAVE_GL();
1527
1528     surface->pbo = 0;
1529     surface->flags &= ~SFLAG_PBO;
1530 }
1531
1532 /* Do not call while under the GL lock. */
1533 static void surface_unload(struct wined3d_resource *resource)
1534 {
1535     struct wined3d_surface *surface = surface_from_resource(resource);
1536     struct wined3d_renderbuffer_entry *entry, *entry2;
1537     struct wined3d_device *device = resource->device;
1538     const struct wined3d_gl_info *gl_info;
1539     struct wined3d_context *context;
1540
1541     TRACE("surface %p.\n", surface);
1542
1543     if (resource->pool == WINED3DPOOL_DEFAULT)
1544     {
1545         /* Default pool resources are supposed to be destroyed before Reset is called.
1546          * Implicit resources stay however. So this means we have an implicit render target
1547          * or depth stencil. The content may be destroyed, but we still have to tear down
1548          * opengl resources, so we cannot leave early.
1549          *
1550          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1551          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1552          * or the depth stencil into an FBO the texture or render buffer will be removed
1553          * and all flags get lost
1554          */
1555         surface_init_sysmem(surface);
1556     }
1557     else
1558     {
1559         /* Load the surface into system memory */
1560         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1561         surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
1562     }
1563     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1564     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1565     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1566
1567     context = context_acquire(device, NULL);
1568     gl_info = context->gl_info;
1569
1570     /* Destroy PBOs, but load them into real sysmem before */
1571     if (surface->flags & SFLAG_PBO)
1572         surface_remove_pbo(surface, gl_info);
1573
1574     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1575      * all application-created targets the application has to release the surface
1576      * before calling _Reset
1577      */
1578     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1579     {
1580         ENTER_GL();
1581         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1582         LEAVE_GL();
1583         list_remove(&entry->entry);
1584         HeapFree(GetProcessHeap(), 0, entry);
1585     }
1586     list_init(&surface->renderbuffers);
1587     surface->current_renderbuffer = NULL;
1588
1589     /* If we're in a texture, the texture name belongs to the texture.
1590      * Otherwise, destroy it. */
1591     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1592     {
1593         ENTER_GL();
1594         glDeleteTextures(1, &surface->texture_name);
1595         surface->texture_name = 0;
1596         glDeleteTextures(1, &surface->texture_name_srgb);
1597         surface->texture_name_srgb = 0;
1598         LEAVE_GL();
1599     }
1600
1601     context_release(context);
1602
1603     resource_unload(resource);
1604 }
1605
1606 static const struct wined3d_resource_ops surface_resource_ops =
1607 {
1608     surface_unload,
1609 };
1610
1611 static const struct wined3d_surface_ops surface_ops =
1612 {
1613     surface_private_setup,
1614     surface_cleanup,
1615     surface_realize_palette,
1616     surface_draw_overlay,
1617     surface_preload,
1618     surface_map,
1619     surface_unmap,
1620     surface_getdc,
1621     surface_flip,
1622     surface_blt,
1623     surface_bltfast,
1624     surface_set_mem,
1625 };
1626
1627 /*****************************************************************************
1628  * Initializes the GDI surface, aka creates the DIB section we render to
1629  * The DIB section creation is done by calling GetDC, which will create the
1630  * section and releasing the dc to allow the app to use it. The dib section
1631  * will stay until the surface is released
1632  *
1633  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1634  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1635  * avoid confusion in the shared surface code.
1636  *
1637  * Returns:
1638  *  WINED3D_OK on success
1639  *  The return values of called methods on failure
1640  *
1641  *****************************************************************************/
1642 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1643 {
1644     HRESULT hr;
1645
1646     TRACE("surface %p.\n", surface);
1647
1648     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1649     {
1650         ERR("Overlays not yet supported by GDI surfaces.\n");
1651         return WINED3DERR_INVALIDCALL;
1652     }
1653
1654     /* Sysmem textures have memory already allocated - release it,
1655      * this avoids an unnecessary memcpy. */
1656     hr = surface_create_dib_section(surface);
1657     if (SUCCEEDED(hr))
1658     {
1659         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1660         surface->resource.heapMemory = NULL;
1661         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1662     }
1663
1664     /* We don't mind the nonpow2 stuff in GDI. */
1665     surface->pow2Width = surface->resource.width;
1666     surface->pow2Height = surface->resource.height;
1667
1668     return WINED3D_OK;
1669 }
1670
1671 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1672 {
1673     TRACE("surface %p.\n", surface);
1674
1675     if (surface->flags & SFLAG_DIBSECTION)
1676     {
1677         /* Release the DC. */
1678         SelectObject(surface->hDC, surface->dib.holdbitmap);
1679         DeleteDC(surface->hDC);
1680         /* Release the DIB section. */
1681         DeleteObject(surface->dib.DIBsection);
1682         surface->dib.bitmap_data = NULL;
1683         surface->resource.allocatedMemory = NULL;
1684     }
1685
1686     if (surface->flags & SFLAG_USERPTR)
1687         wined3d_surface_set_mem(surface, NULL);
1688     if (surface->overlay_dest)
1689         list_remove(&surface->overlay_entry);
1690
1691     HeapFree(GetProcessHeap(), 0, surface->palette9);
1692
1693     resource_cleanup(&surface->resource);
1694 }
1695
1696 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1697 {
1698     struct wined3d_palette *palette = surface->palette;
1699
1700     TRACE("surface %p.\n", surface);
1701
1702     if (!palette) return;
1703
1704     if (surface->flags & SFLAG_DIBSECTION)
1705     {
1706         RGBQUAD col[256];
1707         unsigned int i;
1708
1709         TRACE("Updating the DC's palette.\n");
1710
1711         for (i = 0; i < 256; ++i)
1712         {
1713             col[i].rgbRed = palette->palents[i].peRed;
1714             col[i].rgbGreen = palette->palents[i].peGreen;
1715             col[i].rgbBlue = palette->palents[i].peBlue;
1716             col[i].rgbReserved = 0;
1717         }
1718         SetDIBColorTable(surface->hDC, 0, 256, col);
1719     }
1720
1721     /* Update the image because of the palette change. Some games like e.g.
1722      * Red Alert call SetEntries a lot to implement fading. */
1723     /* Tell the swapchain to update the screen. */
1724     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1725     {
1726         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1727         if (surface == swapchain->front_buffer)
1728         {
1729             x11_copy_to_screen(swapchain, NULL);
1730         }
1731     }
1732 }
1733
1734 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
1735 {
1736     FIXME("GDI surfaces can't draw overlays yet.\n");
1737     return E_FAIL;
1738 }
1739
1740 static void gdi_surface_preload(struct wined3d_surface *surface)
1741 {
1742     TRACE("surface %p.\n", surface);
1743
1744     ERR("Preloading GDI surfaces is not supported.\n");
1745 }
1746
1747 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1748 {
1749     TRACE("surface %p, rect %s, flags %#x.\n",
1750             surface, wine_dbgstr_rect(rect), flags);
1751
1752     if (!surface->resource.allocatedMemory)
1753     {
1754         /* This happens on gdi surfaces if the application set a user pointer
1755          * and resets it. Recreate the DIB section. */
1756         surface_create_dib_section(surface);
1757         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1758     }
1759 }
1760
1761 static void gdi_surface_unmap(struct wined3d_surface *surface)
1762 {
1763     TRACE("surface %p.\n", surface);
1764
1765     /* Tell the swapchain to update the screen. */
1766     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1767     {
1768         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1769         if (surface == swapchain->front_buffer)
1770         {
1771             x11_copy_to_screen(swapchain, &surface->lockedRect);
1772         }
1773     }
1774
1775     memset(&surface->lockedRect, 0, sizeof(RECT));
1776 }
1777
1778 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
1779 {
1780     WINED3DLOCKED_RECT lock;
1781     HRESULT hr;
1782
1783     TRACE("surface %p.\n", surface);
1784
1785     /* Should have a DIB section already. */
1786     if (!(surface->flags & SFLAG_DIBSECTION))
1787     {
1788         WARN("DC not supported on this surface\n");
1789         return WINED3DERR_INVALIDCALL;
1790     }
1791
1792     /* Map the surface. */
1793     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1794     if (FAILED(hr))
1795         ERR("Map failed, hr %#x.\n", hr);
1796
1797     return hr;
1798 }
1799
1800 static HRESULT gdi_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1801 {
1802     TRACE("surface %p, override %p.\n", surface, override);
1803
1804     return WINED3D_OK;
1805 }
1806
1807 static HRESULT gdi_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
1808         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
1809         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1810 {
1811     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1812             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
1813             flags, fx, debug_d3dtexturefiltertype(filter));
1814
1815     return surface_cpu_blt(dst_surface, dst_rect, src_surface, src_rect, flags, fx, filter);
1816 }
1817
1818 static HRESULT gdi_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1819         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
1820 {
1821     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
1822             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
1823
1824     return surface_cpu_bltfast(dst_surface, dst_x, dst_y, src_surface, src_rect, trans);
1825 }
1826
1827 static HRESULT gdi_surface_set_mem(struct wined3d_surface *surface, void *mem)
1828 {
1829     TRACE("surface %p, mem %p.\n", surface, mem);
1830
1831     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
1832     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1833     {
1834         ERR("Not supported on render targets.\n");
1835         return WINED3DERR_INVALIDCALL;
1836     }
1837
1838     if (mem && mem != surface->resource.allocatedMemory)
1839     {
1840         void *release = NULL;
1841
1842         /* Do I have to copy the old surface content? */
1843         if (surface->flags & SFLAG_DIBSECTION)
1844         {
1845             SelectObject(surface->hDC, surface->dib.holdbitmap);
1846             DeleteDC(surface->hDC);
1847             /* Release the DIB section. */
1848             DeleteObject(surface->dib.DIBsection);
1849             surface->dib.bitmap_data = NULL;
1850             surface->resource.allocatedMemory = NULL;
1851             surface->hDC = NULL;
1852             surface->flags &= ~SFLAG_DIBSECTION;
1853         }
1854         else if (!(surface->flags & SFLAG_USERPTR))
1855         {
1856             release = surface->resource.allocatedMemory;
1857         }
1858         surface->resource.allocatedMemory = mem;
1859         surface->flags |= SFLAG_USERPTR | SFLAG_INSYSMEM;
1860
1861         /* Now free the old memory, if any. */
1862         HeapFree(GetProcessHeap(), 0, release);
1863     }
1864     else if (surface->flags & SFLAG_USERPTR)
1865     {
1866         /* Map() and GetDC() will re-create the dib section and allocated memory. */
1867         surface->resource.allocatedMemory = NULL;
1868         surface->flags &= ~SFLAG_USERPTR;
1869     }
1870
1871     return WINED3D_OK;
1872 }
1873
1874 static const struct wined3d_surface_ops gdi_surface_ops =
1875 {
1876     gdi_surface_private_setup,
1877     surface_gdi_cleanup,
1878     gdi_surface_realize_palette,
1879     gdi_surface_draw_overlay,
1880     gdi_surface_preload,
1881     gdi_surface_map,
1882     gdi_surface_unmap,
1883     gdi_surface_getdc,
1884     gdi_surface_flip,
1885     gdi_surface_blt,
1886     gdi_surface_bltfast,
1887     gdi_surface_set_mem,
1888 };
1889
1890 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
1891 {
1892     GLuint *name;
1893     DWORD flag;
1894
1895     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
1896
1897     if(srgb)
1898     {
1899         name = &surface->texture_name_srgb;
1900         flag = SFLAG_INSRGBTEX;
1901     }
1902     else
1903     {
1904         name = &surface->texture_name;
1905         flag = SFLAG_INTEXTURE;
1906     }
1907
1908     if (!*name && new_name)
1909     {
1910         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
1911          * surface has no texture name yet. See if we can get rid of this. */
1912         if (surface->flags & flag)
1913             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
1914         surface_modify_location(surface, flag, FALSE);
1915     }
1916
1917     *name = new_name;
1918     surface_force_reload(surface);
1919 }
1920
1921 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
1922 {
1923     TRACE("surface %p, target %#x.\n", surface, target);
1924
1925     if (surface->texture_target != target)
1926     {
1927         if (target == GL_TEXTURE_RECTANGLE_ARB)
1928         {
1929             surface->flags &= ~SFLAG_NORMCOORD;
1930         }
1931         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
1932         {
1933             surface->flags |= SFLAG_NORMCOORD;
1934         }
1935     }
1936     surface->texture_target = target;
1937     surface_force_reload(surface);
1938 }
1939
1940 /* Context activation is done by the caller. */
1941 void surface_bind(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
1942 {
1943     TRACE("surface %p, gl_info %p, srgb %#x.\n", surface, gl_info, srgb);
1944
1945     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
1946     {
1947         struct wined3d_texture *texture = surface->container.u.texture;
1948
1949         TRACE("Passing to container (%p).\n", texture);
1950         texture->texture_ops->texture_bind(texture, gl_info, srgb);
1951     }
1952     else
1953     {
1954         if (surface->texture_level)
1955         {
1956             ERR("Standalone surface %p is non-zero texture level %u.\n",
1957                     surface, surface->texture_level);
1958         }
1959
1960         if (srgb)
1961             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
1962
1963         ENTER_GL();
1964
1965         if (!surface->texture_name)
1966         {
1967             glGenTextures(1, &surface->texture_name);
1968             checkGLcall("glGenTextures");
1969
1970             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
1971
1972             glBindTexture(surface->texture_target, surface->texture_name);
1973             checkGLcall("glBindTexture");
1974             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
1975             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
1976             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
1977             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1978             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1979             checkGLcall("glTexParameteri");
1980         }
1981         else
1982         {
1983             glBindTexture(surface->texture_target, surface->texture_name);
1984             checkGLcall("glBindTexture");
1985         }
1986
1987         LEAVE_GL();
1988     }
1989 }
1990
1991 /* This function checks if the primary render target uses the 8bit paletted format. */
1992 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1993 {
1994     if (device->fb.render_targets && device->fb.render_targets[0])
1995     {
1996         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1997         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1998                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1999             return TRUE;
2000     }
2001     return FALSE;
2002 }
2003
2004 /* This call just downloads data, the caller is responsible for binding the
2005  * correct texture. */
2006 /* Context activation is done by the caller. */
2007 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2008 {
2009     const struct wined3d_format *format = surface->resource.format;
2010
2011     /* Only support read back of converted P8 surfaces. */
2012     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2013     {
2014         FIXME("Readback conversion not supported for format %s.\n", debug_d3dformat(format->id));
2015         return;
2016     }
2017
2018     ENTER_GL();
2019
2020     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2021     {
2022         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2023                 surface, surface->texture_level, format->glFormat, format->glType,
2024                 surface->resource.allocatedMemory);
2025
2026         if (surface->flags & SFLAG_PBO)
2027         {
2028             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2029             checkGLcall("glBindBufferARB");
2030             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2031             checkGLcall("glGetCompressedTexImageARB");
2032             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2033             checkGLcall("glBindBufferARB");
2034         }
2035         else
2036         {
2037             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2038                     surface->texture_level, surface->resource.allocatedMemory));
2039             checkGLcall("glGetCompressedTexImageARB");
2040         }
2041
2042         LEAVE_GL();
2043     }
2044     else
2045     {
2046         void *mem;
2047         GLenum gl_format = format->glFormat;
2048         GLenum gl_type = format->glType;
2049         int src_pitch = 0;
2050         int dst_pitch = 0;
2051
2052         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2053         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2054         {
2055             gl_format = GL_ALPHA;
2056             gl_type = GL_UNSIGNED_BYTE;
2057         }
2058
2059         if (surface->flags & SFLAG_NONPOW2)
2060         {
2061             unsigned char alignment = surface->resource.device->surface_alignment;
2062             src_pitch = format->byte_count * surface->pow2Width;
2063             dst_pitch = wined3d_surface_get_pitch(surface);
2064             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2065             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2066         }
2067         else
2068         {
2069             mem = surface->resource.allocatedMemory;
2070         }
2071
2072         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2073                 surface, surface->texture_level, gl_format, gl_type, mem);
2074
2075         if (surface->flags & SFLAG_PBO)
2076         {
2077             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2078             checkGLcall("glBindBufferARB");
2079
2080             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2081             checkGLcall("glGetTexImage");
2082
2083             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2084             checkGLcall("glBindBufferARB");
2085         }
2086         else
2087         {
2088             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2089             checkGLcall("glGetTexImage");
2090         }
2091         LEAVE_GL();
2092
2093         if (surface->flags & SFLAG_NONPOW2)
2094         {
2095             const BYTE *src_data;
2096             BYTE *dst_data;
2097             UINT y;
2098             /*
2099              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2100              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2101              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2102              *
2103              * We're doing this...
2104              *
2105              * instead of boxing the texture :
2106              * |<-texture width ->|  -->pow2width|   /\
2107              * |111111111111111111|              |   |
2108              * |222 Texture 222222| boxed empty  | texture height
2109              * |3333 Data 33333333|              |   |
2110              * |444444444444444444|              |   \/
2111              * -----------------------------------   |
2112              * |     boxed  empty | boxed empty  | pow2height
2113              * |                  |              |   \/
2114              * -----------------------------------
2115              *
2116              *
2117              * we're repacking the data to the expected texture width
2118              *
2119              * |<-texture width ->|  -->pow2width|   /\
2120              * |111111111111111111222222222222222|   |
2121              * |222333333333333333333444444444444| texture height
2122              * |444444                           |   |
2123              * |                                 |   \/
2124              * |                                 |   |
2125              * |            empty                | pow2height
2126              * |                                 |   \/
2127              * -----------------------------------
2128              *
2129              * == is the same as
2130              *
2131              * |<-texture width ->|    /\
2132              * |111111111111111111|
2133              * |222222222222222222|texture height
2134              * |333333333333333333|
2135              * |444444444444444444|    \/
2136              * --------------------
2137              *
2138              * this also means that any references to allocatedMemory should work with the data as if were a
2139              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2140              *
2141              * internally the texture is still stored in a boxed format so any references to textureName will
2142              * get a boxed texture with width pow2width and not a texture of width resource.width.
2143              *
2144              * Performance should not be an issue, because applications normally do not lock the surfaces when
2145              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2146              * and doesn't have to be re-read. */
2147             src_data = mem;
2148             dst_data = surface->resource.allocatedMemory;
2149             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2150             for (y = 1; y < surface->resource.height; ++y)
2151             {
2152                 /* skip the first row */
2153                 src_data += src_pitch;
2154                 dst_data += dst_pitch;
2155                 memcpy(dst_data, src_data, dst_pitch);
2156             }
2157
2158             HeapFree(GetProcessHeap(), 0, mem);
2159         }
2160     }
2161
2162     /* Surface has now been downloaded */
2163     surface->flags |= SFLAG_INSYSMEM;
2164 }
2165
2166 /* This call just uploads data, the caller is responsible for binding the
2167  * correct texture. */
2168 /* Context activation is done by the caller. */
2169 void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2170         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2171         BOOL srgb, const struct wined3d_bo_address *data)
2172 {
2173     UINT update_w = src_rect->right - src_rect->left;
2174     UINT update_h = src_rect->bottom - src_rect->top;
2175
2176     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2177             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2178             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2179
2180     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2181         update_h *= format->heightscale;
2182
2183     ENTER_GL();
2184
2185     if (data->buffer_object)
2186     {
2187         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2188         checkGLcall("glBindBufferARB");
2189     }
2190
2191     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2192     {
2193         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2194         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2195         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2196         const BYTE *addr = data->addr;
2197         GLenum internal;
2198
2199         addr += (src_rect->top / format->block_height) * src_pitch;
2200         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2201
2202         if (srgb)
2203             internal = format->glGammaInternal;
2204         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2205             internal = format->rtInternal;
2206         else
2207             internal = format->glInternal;
2208
2209         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2210                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2211                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2212
2213         if (row_length == src_pitch)
2214         {
2215             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2216                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2217         }
2218         else
2219         {
2220             UINT row, y;
2221
2222             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2223              * can't use the unpack row length like below. */
2224             for (row = 0, y = dst_point->y; row < row_count; ++row)
2225             {
2226                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2227                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2228                 y += format->block_height;
2229                 addr += src_pitch;
2230             }
2231         }
2232         checkGLcall("glCompressedTexSubImage2DARB");
2233     }
2234     else
2235     {
2236         const BYTE *addr = data->addr;
2237
2238         addr += src_rect->top * src_w * format->byte_count;
2239         addr += src_rect->left * format->byte_count;
2240
2241         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2242                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2243                 update_w, update_h, format->glFormat, format->glType, addr);
2244
2245         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2246         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2247                 update_w, update_h, format->glFormat, format->glType, addr);
2248         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2249         checkGLcall("glTexSubImage2D");
2250     }
2251
2252     if (data->buffer_object)
2253     {
2254         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2255         checkGLcall("glBindBufferARB");
2256     }
2257
2258     LEAVE_GL();
2259
2260     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2261     {
2262         struct wined3d_device *device = surface->resource.device;
2263         unsigned int i;
2264
2265         for (i = 0; i < device->context_count; ++i)
2266         {
2267             context_surface_update(device->contexts[i], surface);
2268         }
2269     }
2270 }
2271
2272 /* This call just allocates the texture, the caller is responsible for binding
2273  * the correct texture. */
2274 /* Context activation is done by the caller. */
2275 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2276         const struct wined3d_format *format, BOOL srgb)
2277 {
2278     BOOL enable_client_storage = FALSE;
2279     GLsizei width = surface->pow2Width;
2280     GLsizei height = surface->pow2Height;
2281     const BYTE *mem = NULL;
2282     GLenum internal;
2283
2284     if (srgb)
2285     {
2286         internal = format->glGammaInternal;
2287     }
2288     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2289     {
2290         internal = format->rtInternal;
2291     }
2292     else
2293     {
2294         internal = format->glInternal;
2295     }
2296
2297     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2298
2299     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2300             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2301             internal, width, height, format->glFormat, format->glType);
2302
2303     ENTER_GL();
2304
2305     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2306     {
2307         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2308                 || !surface->resource.allocatedMemory)
2309         {
2310             /* In some cases we want to disable client storage.
2311              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2312              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2313              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2314              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2315              */
2316             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2317             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2318             surface->flags &= ~SFLAG_CLIENT;
2319             enable_client_storage = TRUE;
2320         }
2321         else
2322         {
2323             surface->flags |= SFLAG_CLIENT;
2324
2325             /* Point OpenGL to our allocated texture memory. Do not use
2326              * resource.allocatedMemory here because it might point into a
2327              * PBO. Instead use heapMemory, but get the alignment right. */
2328             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2329                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2330         }
2331     }
2332
2333     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2334     {
2335         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2336                 internal, width, height, 0, surface->resource.size, mem));
2337         checkGLcall("glCompressedTexImage2DARB");
2338     }
2339     else
2340     {
2341         glTexImage2D(surface->texture_target, surface->texture_level,
2342                 internal, width, height, 0, format->glFormat, format->glType, mem);
2343         checkGLcall("glTexImage2D");
2344     }
2345
2346     if(enable_client_storage) {
2347         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2348         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2349     }
2350     LEAVE_GL();
2351 }
2352
2353 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2354  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2355 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2356 /* GL locking is done by the caller */
2357 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2358 {
2359     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2360     struct wined3d_renderbuffer_entry *entry;
2361     GLuint renderbuffer = 0;
2362     unsigned int src_width, src_height;
2363     unsigned int width, height;
2364
2365     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2366     {
2367         width = rt->pow2Width;
2368         height = rt->pow2Height;
2369     }
2370     else
2371     {
2372         width = surface->pow2Width;
2373         height = surface->pow2Height;
2374     }
2375
2376     src_width = surface->pow2Width;
2377     src_height = surface->pow2Height;
2378
2379     /* A depth stencil smaller than the render target is not valid */
2380     if (width > src_width || height > src_height) return;
2381
2382     /* Remove any renderbuffer set if the sizes match */
2383     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2384             || (width == src_width && height == src_height))
2385     {
2386         surface->current_renderbuffer = NULL;
2387         return;
2388     }
2389
2390     /* Look if we've already got a renderbuffer of the correct dimensions */
2391     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2392     {
2393         if (entry->width == width && entry->height == height)
2394         {
2395             renderbuffer = entry->id;
2396             surface->current_renderbuffer = entry;
2397             break;
2398         }
2399     }
2400
2401     if (!renderbuffer)
2402     {
2403         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2404         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2405         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2406                 surface->resource.format->glInternal, width, height);
2407
2408         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2409         entry->width = width;
2410         entry->height = height;
2411         entry->id = renderbuffer;
2412         list_add_head(&surface->renderbuffers, &entry->entry);
2413
2414         surface->current_renderbuffer = entry;
2415     }
2416
2417     checkGLcall("set_compatible_renderbuffer");
2418 }
2419
2420 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2421 {
2422     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2423
2424     TRACE("surface %p.\n", surface);
2425
2426     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2427     {
2428         ERR("Surface %p is not on a swapchain.\n", surface);
2429         return GL_NONE;
2430     }
2431
2432     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2433     {
2434         if (swapchain->render_to_fbo)
2435         {
2436             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2437             return GL_COLOR_ATTACHMENT0;
2438         }
2439         TRACE("Returning GL_BACK\n");
2440         return GL_BACK;
2441     }
2442     else if (surface == swapchain->front_buffer)
2443     {
2444         TRACE("Returning GL_FRONT\n");
2445         return GL_FRONT;
2446     }
2447
2448     FIXME("Higher back buffer, returning GL_BACK\n");
2449     return GL_BACK;
2450 }
2451
2452 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2453 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2454 {
2455     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2456
2457     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2458         /* No partial locking for textures yet. */
2459         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2460
2461     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2462     if (dirty_rect)
2463     {
2464         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2465         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2466         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2467         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2468     }
2469     else
2470     {
2471         surface->dirtyRect.left = 0;
2472         surface->dirtyRect.top = 0;
2473         surface->dirtyRect.right = surface->resource.width;
2474         surface->dirtyRect.bottom = surface->resource.height;
2475     }
2476
2477     /* if the container is a texture then mark it dirty. */
2478     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2479     {
2480         TRACE("Passing to container.\n");
2481         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2482     }
2483 }
2484
2485 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
2486         DWORD color, WINED3DCOLORVALUE *float_color)
2487 {
2488     const struct wined3d_format *format = surface->resource.format;
2489     const struct wined3d_device *device = surface->resource.device;
2490
2491     switch (format->id)
2492     {
2493         case WINED3DFMT_P8_UINT:
2494             if (surface->palette)
2495             {
2496                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
2497                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
2498                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
2499             }
2500             else
2501             {
2502                 float_color->r = 0.0f;
2503                 float_color->g = 0.0f;
2504                 float_color->b = 0.0f;
2505             }
2506             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
2507             break;
2508
2509         case WINED3DFMT_B5G6R5_UNORM:
2510             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
2511             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
2512             float_color->b = (color & 0x1f) / 31.0f;
2513             float_color->a = 1.0f;
2514             break;
2515
2516         case WINED3DFMT_B8G8R8_UNORM:
2517         case WINED3DFMT_B8G8R8X8_UNORM:
2518             float_color->r = D3DCOLOR_R(color);
2519             float_color->g = D3DCOLOR_G(color);
2520             float_color->b = D3DCOLOR_B(color);
2521             float_color->a = 1.0f;
2522             break;
2523
2524         case WINED3DFMT_B8G8R8A8_UNORM:
2525             float_color->r = D3DCOLOR_R(color);
2526             float_color->g = D3DCOLOR_G(color);
2527             float_color->b = D3DCOLOR_B(color);
2528             float_color->a = D3DCOLOR_A(color);
2529             break;
2530
2531         default:
2532             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
2533             return FALSE;
2534     }
2535
2536     return TRUE;
2537 }
2538
2539 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2540 {
2541     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2542
2543     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2544
2545     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2546     {
2547         ERR("Not supported on scratch surfaces.\n");
2548         return WINED3DERR_INVALIDCALL;
2549     }
2550
2551     if (!(surface->flags & flag))
2552     {
2553         TRACE("Reloading because surface is dirty\n");
2554     }
2555     /* Reload if either the texture and sysmem have different ideas about the
2556      * color key, or the actual key values changed. */
2557     else if (!(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2558             || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2559             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2560             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2561     {
2562         TRACE("Reloading because of color keying\n");
2563         /* To perform the color key conversion we need a sysmem copy of
2564          * the surface. Make sure we have it. */
2565
2566         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2567         /* Make sure the texture is reloaded because of the color key change,
2568          * this kills performance though :( */
2569         /* TODO: This is not necessarily needed with hw palettized texture support. */
2570         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2571     }
2572     else
2573     {
2574         TRACE("surface is already in texture\n");
2575         return WINED3D_OK;
2576     }
2577
2578     /* No partial locking for textures yet. */
2579     surface_load_location(surface, flag, NULL);
2580     surface_evict_sysmem(surface);
2581
2582     return WINED3D_OK;
2583 }
2584
2585 /* See also float_16_to_32() in wined3d_private.h */
2586 static inline unsigned short float_32_to_16(const float *in)
2587 {
2588     int exp = 0;
2589     float tmp = fabsf(*in);
2590     unsigned int mantissa;
2591     unsigned short ret;
2592
2593     /* Deal with special numbers */
2594     if (*in == 0.0f)
2595         return 0x0000;
2596     if (isnan(*in))
2597         return 0x7c01;
2598     if (isinf(*in))
2599         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2600
2601     if (tmp < powf(2, 10))
2602     {
2603         do
2604         {
2605             tmp = tmp * 2.0f;
2606             exp--;
2607         } while (tmp < powf(2, 10));
2608     }
2609     else if (tmp >= powf(2, 11))
2610     {
2611         do
2612         {
2613             tmp /= 2.0f;
2614             exp++;
2615         } while (tmp >= powf(2, 11));
2616     }
2617
2618     mantissa = (unsigned int)tmp;
2619     if (tmp - mantissa >= 0.5f)
2620         ++mantissa; /* Round to nearest, away from zero. */
2621
2622     exp += 10;  /* Normalize the mantissa. */
2623     exp += 15;  /* Exponent is encoded with excess 15. */
2624
2625     if (exp > 30) /* too big */
2626     {
2627         ret = 0x7c00; /* INF */
2628     }
2629     else if (exp <= 0)
2630     {
2631         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2632         while (exp <= 0)
2633         {
2634             mantissa = mantissa >> 1;
2635             ++exp;
2636         }
2637         ret = mantissa & 0x3ff;
2638     }
2639     else
2640     {
2641         ret = (exp << 10) | (mantissa & 0x3ff);
2642     }
2643
2644     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2645     return ret;
2646 }
2647
2648 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2649 {
2650     ULONG refcount;
2651
2652     TRACE("Surface %p, container %p of type %#x.\n",
2653             surface, surface->container.u.base, surface->container.type);
2654
2655     switch (surface->container.type)
2656     {
2657         case WINED3D_CONTAINER_TEXTURE:
2658             return wined3d_texture_incref(surface->container.u.texture);
2659
2660         case WINED3D_CONTAINER_SWAPCHAIN:
2661             return wined3d_swapchain_incref(surface->container.u.swapchain);
2662
2663         default:
2664             ERR("Unhandled container type %#x.\n", surface->container.type);
2665         case WINED3D_CONTAINER_NONE:
2666             break;
2667     }
2668
2669     refcount = InterlockedIncrement(&surface->resource.ref);
2670     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2671
2672     return refcount;
2673 }
2674
2675 /* Do not call while under the GL lock. */
2676 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2677 {
2678     ULONG refcount;
2679
2680     TRACE("Surface %p, container %p of type %#x.\n",
2681             surface, surface->container.u.base, surface->container.type);
2682
2683     switch (surface->container.type)
2684     {
2685         case WINED3D_CONTAINER_TEXTURE:
2686             return wined3d_texture_decref(surface->container.u.texture);
2687
2688         case WINED3D_CONTAINER_SWAPCHAIN:
2689             return wined3d_swapchain_decref(surface->container.u.swapchain);
2690
2691         default:
2692             ERR("Unhandled container type %#x.\n", surface->container.type);
2693         case WINED3D_CONTAINER_NONE:
2694             break;
2695     }
2696
2697     refcount = InterlockedDecrement(&surface->resource.ref);
2698     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2699
2700     if (!refcount)
2701     {
2702         surface->surface_ops->surface_cleanup(surface);
2703         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2704
2705         TRACE("Destroyed surface %p.\n", surface);
2706         HeapFree(GetProcessHeap(), 0, surface);
2707     }
2708
2709     return refcount;
2710 }
2711
2712 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2713 {
2714     return resource_set_priority(&surface->resource, priority);
2715 }
2716
2717 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2718 {
2719     return resource_get_priority(&surface->resource);
2720 }
2721
2722 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2723 {
2724     TRACE("surface %p.\n", surface);
2725
2726     surface->surface_ops->surface_preload(surface);
2727 }
2728
2729 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2730 {
2731     TRACE("surface %p.\n", surface);
2732
2733     return surface->resource.parent;
2734 }
2735
2736 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2737 {
2738     TRACE("surface %p.\n", surface);
2739
2740     return &surface->resource;
2741 }
2742
2743 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2744 {
2745     TRACE("surface %p, flags %#x.\n", surface, flags);
2746
2747     switch (flags)
2748     {
2749         case WINEDDGBS_CANBLT:
2750         case WINEDDGBS_ISBLTDONE:
2751             return WINED3D_OK;
2752
2753         default:
2754             return WINED3DERR_INVALIDCALL;
2755     }
2756 }
2757
2758 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2759 {
2760     TRACE("surface %p, flags %#x.\n", surface, flags);
2761
2762     /* XXX: DDERR_INVALIDSURFACETYPE */
2763
2764     switch (flags)
2765     {
2766         case WINEDDGFS_CANFLIP:
2767         case WINEDDGFS_ISFLIPDONE:
2768             return WINED3D_OK;
2769
2770         default:
2771             return WINED3DERR_INVALIDCALL;
2772     }
2773 }
2774
2775 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2776 {
2777     TRACE("surface %p.\n", surface);
2778
2779     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2780     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2781 }
2782
2783 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2784 {
2785     TRACE("surface %p.\n", surface);
2786
2787     /* So far we don't lose anything :) */
2788     surface->flags &= ~SFLAG_LOST;
2789     return WINED3D_OK;
2790 }
2791
2792 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2793 {
2794     TRACE("surface %p, palette %p.\n", surface, palette);
2795
2796     if (surface->palette == palette)
2797     {
2798         TRACE("Nop palette change.\n");
2799         return WINED3D_OK;
2800     }
2801
2802     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2803         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2804
2805     surface->palette = palette;
2806
2807     if (palette)
2808     {
2809         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2810             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2811
2812         surface->surface_ops->surface_realize_palette(surface);
2813     }
2814
2815     return WINED3D_OK;
2816 }
2817
2818 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2819         DWORD flags, const WINEDDCOLORKEY *color_key)
2820 {
2821     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
2822
2823     if (flags & WINEDDCKEY_COLORSPACE)
2824     {
2825         FIXME(" colorkey value not supported (%08x) !\n", flags);
2826         return WINED3DERR_INVALIDCALL;
2827     }
2828
2829     /* Dirtify the surface, but only if a key was changed. */
2830     if (color_key)
2831     {
2832         switch (flags & ~WINEDDCKEY_COLORSPACE)
2833         {
2834             case WINEDDCKEY_DESTBLT:
2835                 surface->DestBltCKey = *color_key;
2836                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
2837                 break;
2838
2839             case WINEDDCKEY_DESTOVERLAY:
2840                 surface->DestOverlayCKey = *color_key;
2841                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
2842                 break;
2843
2844             case WINEDDCKEY_SRCOVERLAY:
2845                 surface->SrcOverlayCKey = *color_key;
2846                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
2847                 break;
2848
2849             case WINEDDCKEY_SRCBLT:
2850                 surface->SrcBltCKey = *color_key;
2851                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
2852                 break;
2853         }
2854     }
2855     else
2856     {
2857         switch (flags & ~WINEDDCKEY_COLORSPACE)
2858         {
2859             case WINEDDCKEY_DESTBLT:
2860                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
2861                 break;
2862
2863             case WINEDDCKEY_DESTOVERLAY:
2864                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
2865                 break;
2866
2867             case WINEDDCKEY_SRCOVERLAY:
2868                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
2869                 break;
2870
2871             case WINEDDCKEY_SRCBLT:
2872                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
2873                 break;
2874         }
2875     }
2876
2877     return WINED3D_OK;
2878 }
2879
2880 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
2881 {
2882     TRACE("surface %p.\n", surface);
2883
2884     return surface->palette;
2885 }
2886
2887 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
2888 {
2889     const struct wined3d_format *format = surface->resource.format;
2890     DWORD pitch;
2891
2892     TRACE("surface %p.\n", surface);
2893
2894     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
2895     {
2896         /* Since compressed formats are block based, pitch means the amount of
2897          * bytes to the next row of block rather than the next row of pixels. */
2898         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
2899         pitch = row_block_count * format->block_byte_count;
2900     }
2901     else
2902     {
2903         unsigned char alignment = surface->resource.device->surface_alignment;
2904         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
2905         pitch = (pitch + alignment - 1) & ~(alignment - 1);
2906     }
2907
2908     TRACE("Returning %u.\n", pitch);
2909
2910     return pitch;
2911 }
2912
2913 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
2914 {
2915     TRACE("surface %p, mem %p.\n", surface, mem);
2916
2917     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
2918     {
2919         WARN("Surface is locked or the DC is in use.\n");
2920         return WINED3DERR_INVALIDCALL;
2921     }
2922
2923     return surface->surface_ops->surface_set_mem(surface, mem);
2924 }
2925
2926 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
2927 {
2928     LONG w, h;
2929
2930     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
2931
2932     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2933     {
2934         WARN("Not an overlay surface.\n");
2935         return WINEDDERR_NOTAOVERLAYSURFACE;
2936     }
2937
2938     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
2939     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
2940     surface->overlay_destrect.left = x;
2941     surface->overlay_destrect.top = y;
2942     surface->overlay_destrect.right = x + w;
2943     surface->overlay_destrect.bottom = y + h;
2944
2945     surface->surface_ops->surface_draw_overlay(surface);
2946
2947     return WINED3D_OK;
2948 }
2949
2950 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
2951 {
2952     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
2953
2954     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2955     {
2956         TRACE("Not an overlay surface.\n");
2957         return WINEDDERR_NOTAOVERLAYSURFACE;
2958     }
2959
2960     if (!surface->overlay_dest)
2961     {
2962         TRACE("Overlay not visible.\n");
2963         *x = 0;
2964         *y = 0;
2965         return WINEDDERR_OVERLAYNOTVISIBLE;
2966     }
2967
2968     *x = surface->overlay_destrect.left;
2969     *y = surface->overlay_destrect.top;
2970
2971     TRACE("Returning position %d, %d.\n", *x, *y);
2972
2973     return WINED3D_OK;
2974 }
2975
2976 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
2977         DWORD flags, struct wined3d_surface *ref)
2978 {
2979     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
2980
2981     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2982     {
2983         TRACE("Not an overlay surface.\n");
2984         return WINEDDERR_NOTAOVERLAYSURFACE;
2985     }
2986
2987     return WINED3D_OK;
2988 }
2989
2990 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
2991         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
2992 {
2993     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
2994             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
2995
2996     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2997     {
2998         WARN("Not an overlay surface.\n");
2999         return WINEDDERR_NOTAOVERLAYSURFACE;
3000     }
3001     else if (!dst_surface)
3002     {
3003         WARN("Dest surface is NULL.\n");
3004         return WINED3DERR_INVALIDCALL;
3005     }
3006
3007     if (src_rect)
3008     {
3009         surface->overlay_srcrect = *src_rect;
3010     }
3011     else
3012     {
3013         surface->overlay_srcrect.left = 0;
3014         surface->overlay_srcrect.top = 0;
3015         surface->overlay_srcrect.right = surface->resource.width;
3016         surface->overlay_srcrect.bottom = surface->resource.height;
3017     }
3018
3019     if (dst_rect)
3020     {
3021         surface->overlay_destrect = *dst_rect;
3022     }
3023     else
3024     {
3025         surface->overlay_destrect.left = 0;
3026         surface->overlay_destrect.top = 0;
3027         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3028         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3029     }
3030
3031     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3032     {
3033         list_remove(&surface->overlay_entry);
3034     }
3035
3036     if (flags & WINEDDOVER_SHOW)
3037     {
3038         if (surface->overlay_dest != dst_surface)
3039         {
3040             surface->overlay_dest = dst_surface;
3041             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3042         }
3043     }
3044     else if (flags & WINEDDOVER_HIDE)
3045     {
3046         /* tests show that the rectangles are erased on hide */
3047         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3048         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3049         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3050         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3051         surface->overlay_dest = NULL;
3052     }
3053
3054     surface->surface_ops->surface_draw_overlay(surface);
3055
3056     return WINED3D_OK;
3057 }
3058
3059 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3060 {
3061     TRACE("surface %p, clipper %p.\n", surface, clipper);
3062
3063     surface->clipper = clipper;
3064
3065     return WINED3D_OK;
3066 }
3067
3068 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3069 {
3070     TRACE("surface %p.\n", surface);
3071
3072     return surface->clipper;
3073 }
3074
3075 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3076 {
3077     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3078
3079     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3080
3081     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3082     {
3083         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3084         return WINED3DERR_INVALIDCALL;
3085     }
3086
3087     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3088             surface->pow2Width, surface->pow2Height);
3089     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3090     surface->resource.format = format;
3091
3092     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3093     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3094             format->glFormat, format->glInternal, format->glType);
3095
3096     return WINED3D_OK;
3097 }
3098
3099 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3100         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3101 {
3102     unsigned short *dst_s;
3103     const float *src_f;
3104     unsigned int x, y;
3105
3106     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3107
3108     for (y = 0; y < h; ++y)
3109     {
3110         src_f = (const float *)(src + y * pitch_in);
3111         dst_s = (unsigned short *) (dst + y * pitch_out);
3112         for (x = 0; x < w; ++x)
3113         {
3114             dst_s[x] = float_32_to_16(src_f + x);
3115         }
3116     }
3117 }
3118
3119 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3120         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3121 {
3122     static const unsigned char convert_5to8[] =
3123     {
3124         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3125         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3126         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3127         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3128     };
3129     static const unsigned char convert_6to8[] =
3130     {
3131         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3132         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3133         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3134         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3135         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3136         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3137         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3138         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3139     };
3140     unsigned int x, y;
3141
3142     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3143
3144     for (y = 0; y < h; ++y)
3145     {
3146         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3147         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3148         for (x = 0; x < w; ++x)
3149         {
3150             WORD pixel = src_line[x];
3151             dst_line[x] = 0xff000000
3152                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3153                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3154                     | convert_5to8[(pixel & 0x001f)];
3155         }
3156     }
3157 }
3158
3159 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3160         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3161 {
3162     unsigned int x, y;
3163
3164     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3165
3166     for (y = 0; y < h; ++y)
3167     {
3168         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3169         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3170
3171         for (x = 0; x < w; ++x)
3172         {
3173             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3174         }
3175     }
3176 }
3177
3178 static inline BYTE cliptobyte(int x)
3179 {
3180     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3181 }
3182
3183 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3184         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3185 {
3186     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3187     unsigned int x, y;
3188
3189     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3190
3191     for (y = 0; y < h; ++y)
3192     {
3193         const BYTE *src_line = src + y * pitch_in;
3194         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3195         for (x = 0; x < w; ++x)
3196         {
3197             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3198              *     C = Y - 16; D = U - 128; E = V - 128;
3199              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3200              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3201              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3202              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3203              * U and V are shared between the pixels. */
3204             if (!(x & 1)) /* For every even pixel, read new U and V. */
3205             {
3206                 d = (int) src_line[1] - 128;
3207                 e = (int) src_line[3] - 128;
3208                 r2 = 409 * e + 128;
3209                 g2 = - 100 * d - 208 * e + 128;
3210                 b2 = 516 * d + 128;
3211             }
3212             c2 = 298 * ((int) src_line[0] - 16);
3213             dst_line[x] = 0xff000000
3214                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3215                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3216                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3217                 /* Scale RGB values to 0..255 range,
3218                  * then clip them if still not in range (may be negative),
3219                  * then shift them within DWORD if necessary. */
3220             src_line += 2;
3221         }
3222     }
3223 }
3224
3225 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3226         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3227 {
3228     unsigned int x, y;
3229     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3230
3231     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3232
3233     for (y = 0; y < h; ++y)
3234     {
3235         const BYTE *src_line = src + y * pitch_in;
3236         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3237         for (x = 0; x < w; ++x)
3238         {
3239             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3240              *     C = Y - 16; D = U - 128; E = V - 128;
3241              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3242              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3243              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3244              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3245              * U and V are shared between the pixels. */
3246             if (!(x & 1)) /* For every even pixel, read new U and V. */
3247             {
3248                 d = (int) src_line[1] - 128;
3249                 e = (int) src_line[3] - 128;
3250                 r2 = 409 * e + 128;
3251                 g2 = - 100 * d - 208 * e + 128;
3252                 b2 = 516 * d + 128;
3253             }
3254             c2 = 298 * ((int) src_line[0] - 16);
3255             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3256                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3257                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3258                 /* Scale RGB values to 0..255 range,
3259                  * then clip them if still not in range (may be negative),
3260                  * then shift them within DWORD if necessary. */
3261             src_line += 2;
3262         }
3263     }
3264 }
3265
3266 struct d3dfmt_convertor_desc
3267 {
3268     enum wined3d_format_id from, to;
3269     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3270 };
3271
3272 static const struct d3dfmt_convertor_desc convertors[] =
3273 {
3274     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3275     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3276     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3277     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3278     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3279 };
3280
3281 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3282         enum wined3d_format_id to)
3283 {
3284     unsigned int i;
3285
3286     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3287     {
3288         if (convertors[i].from == from && convertors[i].to == to)
3289             return &convertors[i];
3290     }
3291
3292     return NULL;
3293 }
3294
3295 /*****************************************************************************
3296  * surface_convert_format
3297  *
3298  * Creates a duplicate of a surface in a different format. Is used by Blt to
3299  * blit between surfaces with different formats.
3300  *
3301  * Parameters
3302  *  source: Source surface
3303  *  fmt: Requested destination format
3304  *
3305  *****************************************************************************/
3306 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3307 {
3308     const struct d3dfmt_convertor_desc *conv;
3309     WINED3DLOCKED_RECT lock_src, lock_dst;
3310     struct wined3d_surface *ret = NULL;
3311     HRESULT hr;
3312
3313     conv = find_convertor(source->resource.format->id, to_fmt);
3314     if (!conv)
3315     {
3316         FIXME("Cannot find a conversion function from format %s to %s.\n",
3317                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3318         return NULL;
3319     }
3320
3321     wined3d_surface_create(source->resource.device, source->resource.width,
3322             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3323             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3324             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3325     if (!ret)
3326     {
3327         ERR("Failed to create a destination surface for conversion.\n");
3328         return NULL;
3329     }
3330
3331     memset(&lock_src, 0, sizeof(lock_src));
3332     memset(&lock_dst, 0, sizeof(lock_dst));
3333
3334     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3335     if (FAILED(hr))
3336     {
3337         ERR("Failed to lock the source surface.\n");
3338         wined3d_surface_decref(ret);
3339         return NULL;
3340     }
3341     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3342     if (FAILED(hr))
3343     {
3344         ERR("Failed to lock the destination surface.\n");
3345         wined3d_surface_unmap(source);
3346         wined3d_surface_decref(ret);
3347         return NULL;
3348     }
3349
3350     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3351             source->resource.width, source->resource.height);
3352
3353     wined3d_surface_unmap(ret);
3354     wined3d_surface_unmap(source);
3355
3356     return ret;
3357 }
3358
3359 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3360         unsigned int bpp, UINT pitch, DWORD color)
3361 {
3362     BYTE *first;
3363     int x, y;
3364
3365     /* Do first row */
3366
3367 #define COLORFILL_ROW(type) \
3368 do { \
3369     type *d = (type *)buf; \
3370     for (x = 0; x < width; ++x) \
3371         d[x] = (type)color; \
3372 } while(0)
3373
3374     switch (bpp)
3375     {
3376         case 1:
3377             COLORFILL_ROW(BYTE);
3378             break;
3379
3380         case 2:
3381             COLORFILL_ROW(WORD);
3382             break;
3383
3384         case 3:
3385         {
3386             BYTE *d = buf;
3387             for (x = 0; x < width; ++x, d += 3)
3388             {
3389                 d[0] = (color      ) & 0xFF;
3390                 d[1] = (color >>  8) & 0xFF;
3391                 d[2] = (color >> 16) & 0xFF;
3392             }
3393             break;
3394         }
3395         case 4:
3396             COLORFILL_ROW(DWORD);
3397             break;
3398
3399         default:
3400             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3401             return WINED3DERR_NOTAVAILABLE;
3402     }
3403
3404 #undef COLORFILL_ROW
3405
3406     /* Now copy first row. */
3407     first = buf;
3408     for (y = 1; y < height; ++y)
3409     {
3410         buf += pitch;
3411         memcpy(buf, first, width * bpp);
3412     }
3413
3414     return WINED3D_OK;
3415 }
3416
3417 /* Do not call while under the GL lock. */
3418 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
3419         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
3420         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
3421 {
3422     RECT src_rect, dst_rect;
3423
3424     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
3425             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
3426             flags, fx, debug_d3dtexturefiltertype(filter));
3427
3428     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
3429     {
3430         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
3431         return WINEDDERR_SURFACEBUSY;
3432     }
3433
3434     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
3435     if (src_surface)
3436         surface_get_rect(src_surface, src_rect_in, &src_rect);
3437     else
3438         memset(&src_rect, 0, sizeof(src_rect));
3439
3440     return dst_surface->surface_ops->surface_blt(dst_surface,
3441             &dst_rect, src_surface, &src_rect, flags, fx, filter);
3442 }
3443
3444 /* Do not call while under the GL lock. */
3445 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
3446         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
3447 {
3448     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, trans %#x.\n",
3449             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
3450
3451     return dst_surface->surface_ops->surface_bltfast(dst_surface,
3452             dst_x, dst_y, src_surface, src_rect, trans);
3453 }
3454
3455 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3456 {
3457     TRACE("surface %p.\n", surface);
3458
3459     if (!(surface->flags & SFLAG_LOCKED))
3460     {
3461         WARN("Trying to unmap unmapped surface.\n");
3462         return WINEDDERR_NOTLOCKED;
3463     }
3464     surface->flags &= ~SFLAG_LOCKED;
3465
3466     surface->surface_ops->surface_unmap(surface);
3467
3468     return WINED3D_OK;
3469 }
3470
3471 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3472         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3473 {
3474     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3475             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3476
3477     if (surface->flags & SFLAG_LOCKED)
3478     {
3479         WARN("Surface is already mapped.\n");
3480         return WINED3DERR_INVALIDCALL;
3481     }
3482     surface->flags |= SFLAG_LOCKED;
3483
3484     if (!(surface->flags & SFLAG_LOCKABLE))
3485         WARN("Trying to lock unlockable surface.\n");
3486
3487     surface->surface_ops->surface_map(surface, rect, flags);
3488
3489     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3490
3491     if (!rect)
3492     {
3493         locked_rect->pBits = surface->resource.allocatedMemory;
3494         surface->lockedRect.left = 0;
3495         surface->lockedRect.top = 0;
3496         surface->lockedRect.right = surface->resource.width;
3497         surface->lockedRect.bottom = surface->resource.height;
3498     }
3499     else
3500     {
3501         const struct wined3d_format *format = surface->resource.format;
3502
3503         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3504         {
3505             /* Compressed textures are block based, so calculate the offset of
3506              * the block that contains the top-left pixel of the locked rectangle. */
3507             locked_rect->pBits = surface->resource.allocatedMemory
3508                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3509                     + ((rect->left / format->block_width) * format->block_byte_count);
3510         }
3511         else
3512         {
3513             locked_rect->pBits = surface->resource.allocatedMemory
3514                     + (locked_rect->Pitch * rect->top)
3515                     + (rect->left * format->byte_count);
3516         }
3517         surface->lockedRect.left = rect->left;
3518         surface->lockedRect.top = rect->top;
3519         surface->lockedRect.right = rect->right;
3520         surface->lockedRect.bottom = rect->bottom;
3521     }
3522
3523     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3524     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3525
3526     return WINED3D_OK;
3527 }
3528
3529 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3530 {
3531     HRESULT hr;
3532
3533     TRACE("surface %p, dc %p.\n", surface, dc);
3534
3535     if (surface->flags & SFLAG_USERPTR)
3536     {
3537         ERR("Not supported on surfaces with application-provided memory.\n");
3538         return WINEDDERR_NODC;
3539     }
3540
3541     /* Give more detailed info for ddraw. */
3542     if (surface->flags & SFLAG_DCINUSE)
3543         return WINEDDERR_DCALREADYCREATED;
3544
3545     /* Can't GetDC if the surface is locked. */
3546     if (surface->flags & SFLAG_LOCKED)
3547         return WINED3DERR_INVALIDCALL;
3548
3549     hr = surface->surface_ops->surface_getdc(surface);
3550     if (FAILED(hr))
3551         return hr;
3552
3553     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3554             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3555     {
3556         /* GetDC on palettized formats is unsupported in D3D9, and the method
3557          * is missing in D3D8, so this should only be used for DX <=7
3558          * surfaces (with non-device palettes). */
3559         const PALETTEENTRY *pal = NULL;
3560
3561         if (surface->palette)
3562         {
3563             pal = surface->palette->palents;
3564         }
3565         else
3566         {
3567             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3568             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3569
3570             if (dds_primary && dds_primary->palette)
3571                 pal = dds_primary->palette->palents;
3572         }
3573
3574         if (pal)
3575         {
3576             RGBQUAD col[256];
3577             unsigned int i;
3578
3579             for (i = 0; i < 256; ++i)
3580             {
3581                 col[i].rgbRed = pal[i].peRed;
3582                 col[i].rgbGreen = pal[i].peGreen;
3583                 col[i].rgbBlue = pal[i].peBlue;
3584                 col[i].rgbReserved = 0;
3585             }
3586             SetDIBColorTable(surface->hDC, 0, 256, col);
3587         }
3588     }
3589
3590     surface->flags |= SFLAG_DCINUSE;
3591
3592     *dc = surface->hDC;
3593     TRACE("Returning dc %p.\n", *dc);
3594
3595     return WINED3D_OK;
3596 }
3597
3598 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3599 {
3600     TRACE("surface %p, dc %p.\n", surface, dc);
3601
3602     if (!(surface->flags & SFLAG_DCINUSE))
3603         return WINEDDERR_NODC;
3604
3605     if (surface->hDC != dc)
3606     {
3607         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3608                 dc, surface->hDC);
3609         return WINEDDERR_NODC;
3610     }
3611
3612     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3613     {
3614         /* Copy the contents of the DIB over to the PBO. */
3615         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->dib.bitmap_size);
3616     }
3617
3618     /* We locked first, so unlock now. */
3619     wined3d_surface_unmap(surface);
3620
3621     surface->flags &= ~SFLAG_DCINUSE;
3622
3623     return WINED3D_OK;
3624 }
3625
3626 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3627 {
3628     struct wined3d_swapchain *swapchain;
3629     HRESULT hr;
3630
3631     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3632
3633     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
3634     {
3635         ERR("Flipped surface is not on a swapchain.\n");
3636         return WINEDDERR_NOTFLIPPABLE;
3637     }
3638     swapchain = surface->container.u.swapchain;
3639
3640     hr = surface->surface_ops->surface_flip(surface, override);
3641     if (FAILED(hr))
3642         return hr;
3643
3644     /* Just overwrite the swapchain presentation interval. This is ok because
3645      * only ddraw apps can call Flip, and only d3d8 and d3d9 applications
3646      * specify the presentation interval. */
3647     if (!(flags & (WINEDDFLIP_NOVSYNC | WINEDDFLIP_INTERVAL2 | WINEDDFLIP_INTERVAL3 | WINEDDFLIP_INTERVAL4)))
3648         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_ONE;
3649     else if (flags & WINEDDFLIP_NOVSYNC)
3650         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
3651     else if (flags & WINEDDFLIP_INTERVAL2)
3652         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_TWO;
3653     else if (flags & WINEDDFLIP_INTERVAL3)
3654         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_THREE;
3655     else
3656         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_FOUR;
3657
3658     return wined3d_swapchain_present(swapchain, NULL, NULL, swapchain->win_handle, NULL, 0);
3659 }
3660
3661 /* Do not call while under the GL lock. */
3662 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3663 {
3664     struct wined3d_device *device = surface->resource.device;
3665
3666     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3667
3668     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3669     {
3670         struct wined3d_texture *texture = surface->container.u.texture;
3671
3672         TRACE("Passing to container (%p).\n", texture);
3673         texture->texture_ops->texture_preload(texture, srgb);
3674     }
3675     else
3676     {
3677         struct wined3d_context *context = NULL;
3678
3679         TRACE("(%p) : About to load surface\n", surface);
3680
3681         if (!device->isInDraw) context = context_acquire(device, NULL);
3682
3683         if (surface->resource.format->id == WINED3DFMT_P8_UINT
3684                 || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3685         {
3686             if (palette9_changed(surface))
3687             {
3688                 TRACE("Reloading surface because the d3d8/9 palette was changed\n");
3689                 /* TODO: This is not necessarily needed with hw palettized texture support */
3690                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3691                 /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
3692                 surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
3693             }
3694         }
3695
3696         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3697
3698         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3699         {
3700             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3701             GLclampf tmp;
3702             tmp = 0.9f;
3703             ENTER_GL();
3704             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3705             LEAVE_GL();
3706         }
3707
3708         if (context) context_release(context);
3709     }
3710 }
3711
3712 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3713 {
3714     if (!surface->resource.allocatedMemory)
3715     {
3716         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3717                 surface->resource.size + RESOURCE_ALIGNMENT);
3718         if (!surface->resource.heapMemory)
3719         {
3720             ERR("Out of memory\n");
3721             return FALSE;
3722         }
3723         surface->resource.allocatedMemory =
3724             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3725     }
3726     else
3727     {
3728         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3729     }
3730
3731     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3732
3733     return TRUE;
3734 }
3735
3736 /* Read the framebuffer back into the surface */
3737 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3738 {
3739     struct wined3d_device *device = surface->resource.device;
3740     const struct wined3d_gl_info *gl_info;
3741     struct wined3d_context *context;
3742     BYTE *mem;
3743     GLint fmt;
3744     GLint type;
3745     BYTE *row, *top, *bottom;
3746     int i;
3747     BOOL bpp;
3748     RECT local_rect;
3749     BOOL srcIsUpsideDown;
3750     GLint rowLen = 0;
3751     GLint skipPix = 0;
3752     GLint skipRow = 0;
3753
3754     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3755         static BOOL warned = FALSE;
3756         if(!warned) {
3757             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3758             warned = TRUE;
3759         }
3760         return;
3761     }
3762
3763     context = context_acquire(device, surface);
3764     context_apply_blit_state(context, device);
3765     gl_info = context->gl_info;
3766
3767     ENTER_GL();
3768
3769     /* Select the correct read buffer, and give some debug output.
3770      * There is no need to keep track of the current read buffer or reset it, every part of the code
3771      * that reads sets the read buffer as desired.
3772      */
3773     if (surface_is_offscreen(surface))
3774     {
3775         /* Mapping the primary render target which is not on a swapchain.
3776          * Read from the back buffer. */
3777         TRACE("Mapping offscreen render target.\n");
3778         glReadBuffer(device->offscreenBuffer);
3779         srcIsUpsideDown = TRUE;
3780     }
3781     else
3782     {
3783         /* Onscreen surfaces are always part of a swapchain */
3784         GLenum buffer = surface_get_gl_buffer(surface);
3785         TRACE("Mapping %#x buffer.\n", buffer);
3786         glReadBuffer(buffer);
3787         checkGLcall("glReadBuffer");
3788         srcIsUpsideDown = FALSE;
3789     }
3790
3791     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3792     if (!rect)
3793     {
3794         local_rect.left = 0;
3795         local_rect.top = 0;
3796         local_rect.right = surface->resource.width;
3797         local_rect.bottom = surface->resource.height;
3798     }
3799     else
3800     {
3801         local_rect = *rect;
3802     }
3803     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3804
3805     switch (surface->resource.format->id)
3806     {
3807         case WINED3DFMT_P8_UINT:
3808         {
3809             if (primary_render_target_is_p8(device))
3810             {
3811                 /* In case of P8 render targets the index is stored in the alpha component */
3812                 fmt = GL_ALPHA;
3813                 type = GL_UNSIGNED_BYTE;
3814                 mem = dest;
3815                 bpp = surface->resource.format->byte_count;
3816             }
3817             else
3818             {
3819                 /* GL can't return palettized data, so read ARGB pixels into a
3820                  * separate block of memory and convert them into palettized format
3821                  * in software. Slow, but if the app means to use palettized render
3822                  * targets and locks it...
3823                  *
3824                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
3825                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
3826                  * for the color channels when palettizing the colors.
3827                  */
3828                 fmt = GL_RGB;
3829                 type = GL_UNSIGNED_BYTE;
3830                 pitch *= 3;
3831                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
3832                 if (!mem)
3833                 {
3834                     ERR("Out of memory\n");
3835                     LEAVE_GL();
3836                     return;
3837                 }
3838                 bpp = surface->resource.format->byte_count * 3;
3839             }
3840         }
3841         break;
3842
3843         default:
3844             mem = dest;
3845             fmt = surface->resource.format->glFormat;
3846             type = surface->resource.format->glType;
3847             bpp = surface->resource.format->byte_count;
3848     }
3849
3850     if (surface->flags & SFLAG_PBO)
3851     {
3852         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
3853         checkGLcall("glBindBufferARB");
3854         if (mem)
3855         {
3856             ERR("mem not null for pbo -- unexpected\n");
3857             mem = NULL;
3858         }
3859     }
3860
3861     /* Save old pixel store pack state */
3862     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
3863     checkGLcall("glGetIntegerv");
3864     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
3865     checkGLcall("glGetIntegerv");
3866     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
3867     checkGLcall("glGetIntegerv");
3868
3869     /* Setup pixel store pack state -- to glReadPixels into the correct place */
3870     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
3871     checkGLcall("glPixelStorei");
3872     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
3873     checkGLcall("glPixelStorei");
3874     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
3875     checkGLcall("glPixelStorei");
3876
3877     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
3878             local_rect.right - local_rect.left,
3879             local_rect.bottom - local_rect.top,
3880             fmt, type, mem);
3881     checkGLcall("glReadPixels");
3882
3883     /* Reset previous pixel store pack state */
3884     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
3885     checkGLcall("glPixelStorei");
3886     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
3887     checkGLcall("glPixelStorei");
3888     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
3889     checkGLcall("glPixelStorei");
3890
3891     if (surface->flags & SFLAG_PBO)
3892     {
3893         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
3894         checkGLcall("glBindBufferARB");
3895
3896         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
3897          * to get a pointer to it and perform the flipping in software. This is a lot
3898          * faster than calling glReadPixels for each line. In case we want more speed
3899          * we should rerender it flipped in a FBO and read the data back from the FBO. */
3900         if (!srcIsUpsideDown)
3901         {
3902             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
3903             checkGLcall("glBindBufferARB");
3904
3905             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
3906             checkGLcall("glMapBufferARB");
3907         }
3908     }
3909
3910     /* TODO: Merge this with the palettization loop below for P8 targets */
3911     if(!srcIsUpsideDown) {
3912         UINT len, off;
3913         /* glReadPixels returns the image upside down, and there is no way to prevent this.
3914             Flip the lines in software */
3915         len = (local_rect.right - local_rect.left) * bpp;
3916         off = local_rect.left * bpp;
3917
3918         row = HeapAlloc(GetProcessHeap(), 0, len);
3919         if(!row) {
3920             ERR("Out of memory\n");
3921             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
3922                 HeapFree(GetProcessHeap(), 0, mem);
3923             LEAVE_GL();
3924             return;
3925         }
3926
3927         top = mem + pitch * local_rect.top;
3928         bottom = mem + pitch * (local_rect.bottom - 1);
3929         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
3930             memcpy(row, top + off, len);
3931             memcpy(top + off, bottom + off, len);
3932             memcpy(bottom + off, row, len);
3933             top += pitch;
3934             bottom -= pitch;
3935         }
3936         HeapFree(GetProcessHeap(), 0, row);
3937
3938         /* Unmap the temp PBO buffer */
3939         if (surface->flags & SFLAG_PBO)
3940         {
3941             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
3942             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
3943         }
3944     }
3945
3946     LEAVE_GL();
3947     context_release(context);
3948
3949     /* For P8 textures we need to perform an inverse palette lookup. This is
3950      * done by searching for a palette index which matches the RGB value.
3951      * Note this isn't guaranteed to work when there are multiple entries for
3952      * the same color but we have no choice. In case of P8 render targets,
3953      * the index is stored in the alpha component so no conversion is needed. */
3954     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
3955     {
3956         const PALETTEENTRY *pal = NULL;
3957         DWORD width = pitch / 3;
3958         int x, y, c;
3959
3960         if (surface->palette)
3961         {
3962             pal = surface->palette->palents;
3963         }
3964         else
3965         {
3966             ERR("Palette is missing, cannot perform inverse palette lookup\n");
3967             HeapFree(GetProcessHeap(), 0, mem);
3968             return;
3969         }
3970
3971         for(y = local_rect.top; y < local_rect.bottom; y++) {
3972             for(x = local_rect.left; x < local_rect.right; x++) {
3973                 /*                      start              lines            pixels      */
3974                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
3975                 const BYTE *green = blue  + 1;
3976                 const BYTE *red = green + 1;
3977
3978                 for(c = 0; c < 256; c++) {
3979                     if(*red   == pal[c].peRed   &&
3980                        *green == pal[c].peGreen &&
3981                        *blue  == pal[c].peBlue)
3982                     {
3983                         *((BYTE *) dest + y * width + x) = c;
3984                         break;
3985                     }
3986                 }
3987             }
3988         }
3989         HeapFree(GetProcessHeap(), 0, mem);
3990     }
3991 }
3992
3993 /* Read the framebuffer contents into a texture */
3994 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
3995 {
3996     struct wined3d_device *device = surface->resource.device;
3997     const struct wined3d_gl_info *gl_info;
3998     struct wined3d_context *context;
3999
4000     if (!surface_is_offscreen(surface))
4001     {
4002         /* We would need to flip onscreen surfaces, but there's no efficient
4003          * way to do that here. It makes more sense for the caller to
4004          * explicitly go through sysmem. */
4005         ERR("Not supported for onscreen targets.\n");
4006         return;
4007     }
4008
4009     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
4010      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
4011      * states in the stateblock, and no driver was found yet that had bugs in that regard.
4012      */
4013     context = context_acquire(device, surface);
4014     gl_info = context->gl_info;
4015     device_invalidate_state(device, STATE_FRAMEBUFFER);
4016
4017     surface_prepare_texture(surface, gl_info, srgb);
4018     surface_bind_and_dirtify(surface, gl_info, srgb);
4019
4020     TRACE("Reading back offscreen render target %p.\n", surface);
4021
4022     ENTER_GL();
4023
4024     glReadBuffer(device->offscreenBuffer);
4025     checkGLcall("glReadBuffer");
4026
4027     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4028             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4029     checkGLcall("glCopyTexSubImage2D");
4030
4031     LEAVE_GL();
4032
4033     context_release(context);
4034 }
4035
4036 /* Context activation is done by the caller. */
4037 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4038         const struct wined3d_gl_info *gl_info, BOOL srgb)
4039 {
4040     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4041     CONVERT_TYPES convert;
4042     struct wined3d_format format;
4043
4044     if (surface->flags & alloc_flag) return;
4045
4046     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4047     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4048     else surface->flags &= ~SFLAG_CONVERTED;
4049
4050     surface_bind_and_dirtify(surface, gl_info, srgb);
4051     surface_allocate_surface(surface, gl_info, &format, srgb);
4052     surface->flags |= alloc_flag;
4053 }
4054
4055 /* Context activation is done by the caller. */
4056 void surface_prepare_texture(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
4057 {
4058     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4059     {
4060         struct wined3d_texture *texture = surface->container.u.texture;
4061         UINT sub_count = texture->level_count * texture->layer_count;
4062         UINT i;
4063
4064         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4065
4066         for (i = 0; i < sub_count; ++i)
4067         {
4068             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4069             surface_prepare_texture_internal(s, gl_info, srgb);
4070         }
4071
4072         return;
4073     }
4074
4075     surface_prepare_texture_internal(surface, gl_info, srgb);
4076 }
4077
4078 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4079         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4080 {
4081     struct wined3d_device *device = surface->resource.device;
4082     UINT pitch = wined3d_surface_get_pitch(surface);
4083     const struct wined3d_gl_info *gl_info;
4084     struct wined3d_context *context;
4085     RECT local_rect;
4086     UINT w, h;
4087
4088     surface_get_rect(surface, rect, &local_rect);
4089
4090     mem += local_rect.top * pitch + local_rect.left * bpp;
4091     w = local_rect.right - local_rect.left;
4092     h = local_rect.bottom - local_rect.top;
4093
4094     /* Activate the correct context for the render target */
4095     context = context_acquire(device, surface);
4096     context_apply_blit_state(context, device);
4097     gl_info = context->gl_info;
4098
4099     ENTER_GL();
4100
4101     if (!surface_is_offscreen(surface))
4102     {
4103         GLenum buffer = surface_get_gl_buffer(surface);
4104         TRACE("Unlocking %#x buffer.\n", buffer);
4105         context_set_draw_buffer(context, buffer);
4106
4107         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4108         glPixelZoom(1.0f, -1.0f);
4109     }
4110     else
4111     {
4112         /* Primary offscreen render target */
4113         TRACE("Offscreen render target.\n");
4114         context_set_draw_buffer(context, device->offscreenBuffer);
4115
4116         glPixelZoom(1.0f, 1.0f);
4117     }
4118
4119     glRasterPos3i(local_rect.left, local_rect.top, 1);
4120     checkGLcall("glRasterPos3i");
4121
4122     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4123     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4124
4125     if (surface->flags & SFLAG_PBO)
4126     {
4127         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4128         checkGLcall("glBindBufferARB");
4129     }
4130
4131     glDrawPixels(w, h, fmt, type, mem);
4132     checkGLcall("glDrawPixels");
4133
4134     if (surface->flags & SFLAG_PBO)
4135     {
4136         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4137         checkGLcall("glBindBufferARB");
4138     }
4139
4140     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4141     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4142
4143     LEAVE_GL();
4144
4145     if (wined3d_settings.strict_draw_ordering
4146             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4147             && surface->container.u.swapchain->front_buffer == surface))
4148         wglFlush();
4149
4150     context_release(context);
4151 }
4152
4153 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4154         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4155 {
4156     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4157     const struct wined3d_device *device = surface->resource.device;
4158     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4159     BOOL blit_supported = FALSE;
4160
4161     /* Copy the default values from the surface. Below we might perform fixups */
4162     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4163     *format = *surface->resource.format;
4164     *convert = NO_CONVERSION;
4165
4166     /* Ok, now look if we have to do any conversion */
4167     switch (surface->resource.format->id)
4168     {
4169         case WINED3DFMT_P8_UINT:
4170             /* Below the call to blit_supported is disabled for Wine 1.2
4171              * because the function isn't operating correctly yet. At the
4172              * moment 8-bit blits are handled in software and if certain GL
4173              * extensions are around, surface conversion is performed at
4174              * upload time. The blit_supported call recognizes it as a
4175              * destination fixup. This type of upload 'fixup' and 8-bit to
4176              * 8-bit blits need to be handled by the blit_shader.
4177              * TODO: get rid of this #if 0. */
4178 #if 0
4179             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4180                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4181                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4182 #endif
4183             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4184
4185             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4186              * texturing. Further also use conversion in case of color keying.
4187              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4188              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4189              * conflicts with this.
4190              */
4191             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4192                     || colorkey_active || !use_texturing)
4193             {
4194                 format->glFormat = GL_RGBA;
4195                 format->glInternal = GL_RGBA;
4196                 format->glType = GL_UNSIGNED_BYTE;
4197                 format->conv_byte_count = 4;
4198                 if (colorkey_active)
4199                     *convert = CONVERT_PALETTED_CK;
4200                 else
4201                     *convert = CONVERT_PALETTED;
4202             }
4203             break;
4204
4205         case WINED3DFMT_B2G3R3_UNORM:
4206             /* **********************
4207                 GL_UNSIGNED_BYTE_3_3_2
4208                 ********************** */
4209             if (colorkey_active) {
4210                 /* This texture format will never be used.. So do not care about color keying
4211                     up until the point in time it will be needed :-) */
4212                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4213             }
4214             break;
4215
4216         case WINED3DFMT_B5G6R5_UNORM:
4217             if (colorkey_active)
4218             {
4219                 *convert = CONVERT_CK_565;
4220                 format->glFormat = GL_RGBA;
4221                 format->glInternal = GL_RGB5_A1;
4222                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4223                 format->conv_byte_count = 2;
4224             }
4225             break;
4226
4227         case WINED3DFMT_B5G5R5X1_UNORM:
4228             if (colorkey_active)
4229             {
4230                 *convert = CONVERT_CK_5551;
4231                 format->glFormat = GL_BGRA;
4232                 format->glInternal = GL_RGB5_A1;
4233                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4234                 format->conv_byte_count = 2;
4235             }
4236             break;
4237
4238         case WINED3DFMT_B8G8R8_UNORM:
4239             if (colorkey_active)
4240             {
4241                 *convert = CONVERT_CK_RGB24;
4242                 format->glFormat = GL_RGBA;
4243                 format->glInternal = GL_RGBA8;
4244                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4245                 format->conv_byte_count = 4;
4246             }
4247             break;
4248
4249         case WINED3DFMT_B8G8R8X8_UNORM:
4250             if (colorkey_active)
4251             {
4252                 *convert = CONVERT_RGB32_888;
4253                 format->glFormat = GL_RGBA;
4254                 format->glInternal = GL_RGBA8;
4255                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4256                 format->conv_byte_count = 4;
4257             }
4258             break;
4259
4260         default:
4261             break;
4262     }
4263
4264     return WINED3D_OK;
4265 }
4266
4267 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4268 {
4269     const struct wined3d_device *device = surface->resource.device;
4270     const struct wined3d_palette *pal = surface->palette;
4271     BOOL index_in_alpha = FALSE;
4272     unsigned int i;
4273
4274     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4275      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4276      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4277      * duplicate entries. Store the color key in the unused alpha component to speed the
4278      * download up and to make conversion unneeded. */
4279     index_in_alpha = primary_render_target_is_p8(device);
4280
4281     if (!pal)
4282     {
4283         /* In DirectDraw the palette is a property of the surface, there are no such things as device palettes. */
4284         if (device->wined3d->flags & WINED3D_PALETTE_PER_SURFACE)
4285         {
4286             ERR("This code should never get entered for DirectDraw!, expect problems\n");
4287             if (index_in_alpha)
4288             {
4289                 /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4290                  * there's no palette at this time. */
4291                 for (i = 0; i < 256; i++) table[i][3] = i;
4292             }
4293         }
4294         else
4295         {
4296             /* Direct3D >= 8 palette usage style: P8 textures use device palettes, palette entry format is A8R8G8B8,
4297              * alpha is stored in peFlags and may be used by the app if D3DPTEXTURECAPS_ALPHAPALETTE device
4298              * capability flag is present (wine does advertise this capability) */
4299             for (i = 0; i < 256; ++i)
4300             {
4301                 table[i][0] = device->palettes[device->currentPalette][i].peRed;
4302                 table[i][1] = device->palettes[device->currentPalette][i].peGreen;
4303                 table[i][2] = device->palettes[device->currentPalette][i].peBlue;
4304                 table[i][3] = device->palettes[device->currentPalette][i].peFlags;
4305             }
4306         }
4307     }
4308     else
4309     {
4310         TRACE("Using surface palette %p\n", pal);
4311         /* Get the surface's palette */
4312         for (i = 0; i < 256; ++i)
4313         {
4314             table[i][0] = pal->palents[i].peRed;
4315             table[i][1] = pal->palents[i].peGreen;
4316             table[i][2] = pal->palents[i].peBlue;
4317
4318             /* When index_in_alpha is set the palette index is stored in the
4319              * alpha component. In case of a readback we can then read
4320              * GL_ALPHA. Color keying is handled in BltOverride using a
4321              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4322              * color key itself is passed to glAlphaFunc in other cases the
4323              * alpha component of pixels that should be masked away is set to 0. */
4324             if (index_in_alpha)
4325             {
4326                 table[i][3] = i;
4327             }
4328             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4329                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4330             {
4331                 table[i][3] = 0x00;
4332             }
4333             else if (pal->flags & WINEDDPCAPS_ALPHA)
4334             {
4335                 table[i][3] = pal->palents[i].peFlags;
4336             }
4337             else
4338             {
4339                 table[i][3] = 0xFF;
4340             }
4341         }
4342     }
4343 }
4344
4345 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4346         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4347 {
4348     const BYTE *source;
4349     BYTE *dest;
4350     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4351
4352     switch (convert) {
4353         case NO_CONVERSION:
4354         {
4355             memcpy(dst, src, pitch * height);
4356             break;
4357         }
4358         case CONVERT_PALETTED:
4359         case CONVERT_PALETTED_CK:
4360         {
4361             BYTE table[256][4];
4362             unsigned int x, y;
4363
4364             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4365
4366             for (y = 0; y < height; y++)
4367             {
4368                 source = src + pitch * y;
4369                 dest = dst + outpitch * y;
4370                 /* This is an 1 bpp format, using the width here is fine */
4371                 for (x = 0; x < width; x++) {
4372                     BYTE color = *source++;
4373                     *dest++ = table[color][0];
4374                     *dest++ = table[color][1];
4375                     *dest++ = table[color][2];
4376                     *dest++ = table[color][3];
4377                 }
4378             }
4379         }
4380         break;
4381
4382         case CONVERT_CK_565:
4383         {
4384             /* Converting the 565 format in 5551 packed to emulate color-keying.
4385
4386               Note : in all these conversion, it would be best to average the averaging
4387                       pixels to get the color of the pixel that will be color-keyed to
4388                       prevent 'color bleeding'. This will be done later on if ever it is
4389                       too visible.
4390
4391               Note2: Nvidia documents say that their driver does not support alpha + color keying
4392                      on the same surface and disables color keying in such a case
4393             */
4394             unsigned int x, y;
4395             const WORD *Source;
4396             WORD *Dest;
4397
4398             TRACE("Color keyed 565\n");
4399
4400             for (y = 0; y < height; y++) {
4401                 Source = (const WORD *)(src + y * pitch);
4402                 Dest = (WORD *) (dst + y * outpitch);
4403                 for (x = 0; x < width; x++ ) {
4404                     WORD color = *Source++;
4405                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4406                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4407                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4408                         *Dest |= 0x0001;
4409                     Dest++;
4410                 }
4411             }
4412         }
4413         break;
4414
4415         case CONVERT_CK_5551:
4416         {
4417             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4418             unsigned int x, y;
4419             const WORD *Source;
4420             WORD *Dest;
4421             TRACE("Color keyed 5551\n");
4422             for (y = 0; y < height; y++) {
4423                 Source = (const WORD *)(src + y * pitch);
4424                 Dest = (WORD *) (dst + y * outpitch);
4425                 for (x = 0; x < width; x++ ) {
4426                     WORD color = *Source++;
4427                     *Dest = color;
4428                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4429                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4430                         *Dest |= (1 << 15);
4431                     else
4432                         *Dest &= ~(1 << 15);
4433                     Dest++;
4434                 }
4435             }
4436         }
4437         break;
4438
4439         case CONVERT_CK_RGB24:
4440         {
4441             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4442             unsigned int x, y;
4443             for (y = 0; y < height; y++)
4444             {
4445                 source = src + pitch * y;
4446                 dest = dst + outpitch * y;
4447                 for (x = 0; x < width; x++) {
4448                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4449                     DWORD dstcolor = color << 8;
4450                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4451                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4452                         dstcolor |= 0xff;
4453                     *(DWORD*)dest = dstcolor;
4454                     source += 3;
4455                     dest += 4;
4456                 }
4457             }
4458         }
4459         break;
4460
4461         case CONVERT_RGB32_888:
4462         {
4463             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4464             unsigned int x, y;
4465             for (y = 0; y < height; y++)
4466             {
4467                 source = src + pitch * y;
4468                 dest = dst + outpitch * y;
4469                 for (x = 0; x < width; x++) {
4470                     DWORD color = 0xffffff & *(const DWORD*)source;
4471                     DWORD dstcolor = color << 8;
4472                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4473                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4474                         dstcolor |= 0xff;
4475                     *(DWORD*)dest = dstcolor;
4476                     source += 4;
4477                     dest += 4;
4478                 }
4479             }
4480         }
4481         break;
4482
4483         default:
4484             ERR("Unsupported conversion type %#x.\n", convert);
4485     }
4486     return WINED3D_OK;
4487 }
4488
4489 BOOL palette9_changed(struct wined3d_surface *surface)
4490 {
4491     struct wined3d_device *device = surface->resource.device;
4492
4493     if (surface->palette || (surface->resource.format->id != WINED3DFMT_P8_UINT
4494             && surface->resource.format->id != WINED3DFMT_P8_UINT_A8_UNORM))
4495     {
4496         /* If a ddraw-style palette is attached assume no d3d9 palette change.
4497          * Also the palette isn't interesting if the surface format isn't P8 or A8P8
4498          */
4499         return FALSE;
4500     }
4501
4502     if (surface->palette9)
4503     {
4504         if (!memcmp(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256))
4505         {
4506             return FALSE;
4507         }
4508     }
4509     else
4510     {
4511         surface->palette9 = HeapAlloc(GetProcessHeap(), 0, sizeof(PALETTEENTRY) * 256);
4512     }
4513     memcpy(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256);
4514
4515     return TRUE;
4516 }
4517
4518 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4519 {
4520     /* Flip the surface contents */
4521     /* Flip the DC */
4522     {
4523         HDC tmp;
4524         tmp = front->hDC;
4525         front->hDC = back->hDC;
4526         back->hDC = tmp;
4527     }
4528
4529     /* Flip the DIBsection */
4530     {
4531         HBITMAP tmp;
4532         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4533         tmp = front->dib.DIBsection;
4534         front->dib.DIBsection = back->dib.DIBsection;
4535         back->dib.DIBsection = tmp;
4536
4537         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4538         else front->flags &= ~SFLAG_DIBSECTION;
4539         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4540         else back->flags &= ~SFLAG_DIBSECTION;
4541     }
4542
4543     /* Flip the surface data */
4544     {
4545         void* tmp;
4546
4547         tmp = front->dib.bitmap_data;
4548         front->dib.bitmap_data = back->dib.bitmap_data;
4549         back->dib.bitmap_data = tmp;
4550
4551         tmp = front->resource.allocatedMemory;
4552         front->resource.allocatedMemory = back->resource.allocatedMemory;
4553         back->resource.allocatedMemory = tmp;
4554
4555         tmp = front->resource.heapMemory;
4556         front->resource.heapMemory = back->resource.heapMemory;
4557         back->resource.heapMemory = tmp;
4558     }
4559
4560     /* Flip the PBO */
4561     {
4562         GLuint tmp_pbo = front->pbo;
4563         front->pbo = back->pbo;
4564         back->pbo = tmp_pbo;
4565     }
4566
4567     /* client_memory should not be different, but just in case */
4568     {
4569         BOOL tmp;
4570         tmp = front->dib.client_memory;
4571         front->dib.client_memory = back->dib.client_memory;
4572         back->dib.client_memory = tmp;
4573     }
4574
4575     /* Flip the opengl texture */
4576     {
4577         GLuint tmp;
4578
4579         tmp = back->texture_name;
4580         back->texture_name = front->texture_name;
4581         front->texture_name = tmp;
4582
4583         tmp = back->texture_name_srgb;
4584         back->texture_name_srgb = front->texture_name_srgb;
4585         front->texture_name_srgb = tmp;
4586
4587         resource_unload(&back->resource);
4588         resource_unload(&front->resource);
4589     }
4590
4591     {
4592         DWORD tmp_flags = back->flags;
4593         back->flags = front->flags;
4594         front->flags = tmp_flags;
4595     }
4596 }
4597
4598 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4599  * pixel copy calls. */
4600 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4601         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4602 {
4603     struct wined3d_device *device = dst_surface->resource.device;
4604     float xrel, yrel;
4605     UINT row;
4606     struct wined3d_context *context;
4607     BOOL upsidedown = FALSE;
4608     RECT dst_rect = *dst_rect_in;
4609
4610     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4611      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4612      */
4613     if(dst_rect.top > dst_rect.bottom) {
4614         UINT tmp = dst_rect.bottom;
4615         dst_rect.bottom = dst_rect.top;
4616         dst_rect.top = tmp;
4617         upsidedown = TRUE;
4618     }
4619
4620     context = context_acquire(device, src_surface);
4621     context_apply_blit_state(context, device);
4622     surface_internal_preload(dst_surface, SRGB_RGB);
4623     ENTER_GL();
4624
4625     /* Bind the target texture */
4626     glBindTexture(dst_surface->texture_target, dst_surface->texture_name);
4627     checkGLcall("glBindTexture");
4628     if (surface_is_offscreen(src_surface))
4629     {
4630         TRACE("Reading from an offscreen target\n");
4631         upsidedown = !upsidedown;
4632         glReadBuffer(device->offscreenBuffer);
4633     }
4634     else
4635     {
4636         glReadBuffer(surface_get_gl_buffer(src_surface));
4637     }
4638     checkGLcall("glReadBuffer");
4639
4640     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4641     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4642
4643     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4644     {
4645         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4646
4647         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4648             ERR("Texture filtering not supported in direct blit\n");
4649         }
4650     }
4651     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4652             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4653     {
4654         ERR("Texture filtering not supported in direct blit\n");
4655     }
4656
4657     if (upsidedown
4658             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4659             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4660     {
4661         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4662
4663         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4664                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4665                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4666                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4667     }
4668     else
4669     {
4670         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4671         /* I have to process this row by row to swap the image,
4672          * otherwise it would be upside down, so stretching in y direction
4673          * doesn't cost extra time
4674          *
4675          * However, stretching in x direction can be avoided if not necessary
4676          */
4677         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4678             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4679             {
4680                 /* Well, that stuff works, but it's very slow.
4681                  * find a better way instead
4682                  */
4683                 UINT col;
4684
4685                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4686                 {
4687                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4688                             dst_rect.left + col /* x offset */, row /* y offset */,
4689                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4690                 }
4691             }
4692             else
4693             {
4694                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4695                         dst_rect.left /* x offset */, row /* y offset */,
4696                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4697             }
4698         }
4699     }
4700     checkGLcall("glCopyTexSubImage2D");
4701
4702     LEAVE_GL();
4703     context_release(context);
4704
4705     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4706      * path is never entered
4707      */
4708     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4709 }
4710
4711 /* Uses the hardware to stretch and flip the image */
4712 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4713         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4714 {
4715     struct wined3d_device *device = dst_surface->resource.device;
4716     struct wined3d_swapchain *src_swapchain = NULL;
4717     GLuint src, backup = 0;
4718     float left, right, top, bottom; /* Texture coordinates */
4719     UINT fbwidth = src_surface->resource.width;
4720     UINT fbheight = src_surface->resource.height;
4721     struct wined3d_context *context;
4722     GLenum drawBuffer = GL_BACK;
4723     GLenum texture_target;
4724     BOOL noBackBufferBackup;
4725     BOOL src_offscreen;
4726     BOOL upsidedown = FALSE;
4727     RECT dst_rect = *dst_rect_in;
4728
4729     TRACE("Using hwstretch blit\n");
4730     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4731     context = context_acquire(device, src_surface);
4732     context_apply_blit_state(context, device);
4733     surface_internal_preload(dst_surface, SRGB_RGB);
4734
4735     src_offscreen = surface_is_offscreen(src_surface);
4736     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4737     if (!noBackBufferBackup && !src_surface->texture_name)
4738     {
4739         /* Get it a description */
4740         surface_internal_preload(src_surface, SRGB_RGB);
4741     }
4742     ENTER_GL();
4743
4744     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4745      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4746      */
4747     if (context->aux_buffers >= 2)
4748     {
4749         /* Got more than one aux buffer? Use the 2nd aux buffer */
4750         drawBuffer = GL_AUX1;
4751     }
4752     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4753     {
4754         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4755         drawBuffer = GL_AUX0;
4756     }
4757
4758     if(noBackBufferBackup) {
4759         glGenTextures(1, &backup);
4760         checkGLcall("glGenTextures");
4761         glBindTexture(GL_TEXTURE_2D, backup);
4762         checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4763         texture_target = GL_TEXTURE_2D;
4764     } else {
4765         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4766          * we are reading from the back buffer, the backup can be used as source texture
4767          */
4768         texture_target = src_surface->texture_target;
4769         glBindTexture(texture_target, src_surface->texture_name);
4770         checkGLcall("glBindTexture(texture_target, src_surface->texture_name)");
4771         glEnable(texture_target);
4772         checkGLcall("glEnable(texture_target)");
4773
4774         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4775         src_surface->flags &= ~SFLAG_INTEXTURE;
4776     }
4777
4778     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4779      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4780      */
4781     if(dst_rect.top > dst_rect.bottom) {
4782         UINT tmp = dst_rect.bottom;
4783         dst_rect.bottom = dst_rect.top;
4784         dst_rect.top = tmp;
4785         upsidedown = TRUE;
4786     }
4787
4788     if (src_offscreen)
4789     {
4790         TRACE("Reading from an offscreen target\n");
4791         upsidedown = !upsidedown;
4792         glReadBuffer(device->offscreenBuffer);
4793     }
4794     else
4795     {
4796         glReadBuffer(surface_get_gl_buffer(src_surface));
4797     }
4798
4799     /* TODO: Only back up the part that will be overwritten */
4800     glCopyTexSubImage2D(texture_target, 0,
4801                         0, 0 /* read offsets */,
4802                         0, 0,
4803                         fbwidth,
4804                         fbheight);
4805
4806     checkGLcall("glCopyTexSubImage2D");
4807
4808     /* No issue with overriding these - the sampler is dirty due to blit usage */
4809     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
4810             wined3d_gl_mag_filter(magLookup, Filter));
4811     checkGLcall("glTexParameteri");
4812     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
4813             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
4814     checkGLcall("glTexParameteri");
4815
4816     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4817         src_swapchain = src_surface->container.u.swapchain;
4818     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
4819     {
4820         src = backup ? backup : src_surface->texture_name;
4821     }
4822     else
4823     {
4824         glReadBuffer(GL_FRONT);
4825         checkGLcall("glReadBuffer(GL_FRONT)");
4826
4827         glGenTextures(1, &src);
4828         checkGLcall("glGenTextures(1, &src)");
4829         glBindTexture(GL_TEXTURE_2D, src);
4830         checkGLcall("glBindTexture(GL_TEXTURE_2D, src)");
4831
4832         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
4833          * out for power of 2 sizes
4834          */
4835         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
4836                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
4837         checkGLcall("glTexImage2D");
4838         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
4839                             0, 0 /* read offsets */,
4840                             0, 0,
4841                             fbwidth,
4842                             fbheight);
4843
4844         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
4845         checkGLcall("glTexParameteri");
4846         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
4847         checkGLcall("glTexParameteri");
4848
4849         glReadBuffer(GL_BACK);
4850         checkGLcall("glReadBuffer(GL_BACK)");
4851
4852         if(texture_target != GL_TEXTURE_2D) {
4853             glDisable(texture_target);
4854             glEnable(GL_TEXTURE_2D);
4855             texture_target = GL_TEXTURE_2D;
4856         }
4857     }
4858     checkGLcall("glEnd and previous");
4859
4860     left = src_rect->left;
4861     right = src_rect->right;
4862
4863     if (!upsidedown)
4864     {
4865         top = src_surface->resource.height - src_rect->top;
4866         bottom = src_surface->resource.height - src_rect->bottom;
4867     }
4868     else
4869     {
4870         top = src_surface->resource.height - src_rect->bottom;
4871         bottom = src_surface->resource.height - src_rect->top;
4872     }
4873
4874     if (src_surface->flags & SFLAG_NORMCOORD)
4875     {
4876         left /= src_surface->pow2Width;
4877         right /= src_surface->pow2Width;
4878         top /= src_surface->pow2Height;
4879         bottom /= src_surface->pow2Height;
4880     }
4881
4882     /* draw the source texture stretched and upside down. The correct surface is bound already */
4883     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
4884     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
4885
4886     context_set_draw_buffer(context, drawBuffer);
4887     glReadBuffer(drawBuffer);
4888
4889     glBegin(GL_QUADS);
4890         /* bottom left */
4891         glTexCoord2f(left, bottom);
4892         glVertex2i(0, 0);
4893
4894         /* top left */
4895         glTexCoord2f(left, top);
4896         glVertex2i(0, dst_rect.bottom - dst_rect.top);
4897
4898         /* top right */
4899         glTexCoord2f(right, top);
4900         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4901
4902         /* bottom right */
4903         glTexCoord2f(right, bottom);
4904         glVertex2i(dst_rect.right - dst_rect.left, 0);
4905     glEnd();
4906     checkGLcall("glEnd and previous");
4907
4908     if (texture_target != dst_surface->texture_target)
4909     {
4910         glDisable(texture_target);
4911         glEnable(dst_surface->texture_target);
4912         texture_target = dst_surface->texture_target;
4913     }
4914
4915     /* Now read the stretched and upside down image into the destination texture */
4916     glBindTexture(texture_target, dst_surface->texture_name);
4917     checkGLcall("glBindTexture");
4918     glCopyTexSubImage2D(texture_target,
4919                         0,
4920                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
4921                         0, 0, /* We blitted the image to the origin */
4922                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4923     checkGLcall("glCopyTexSubImage2D");
4924
4925     if(drawBuffer == GL_BACK) {
4926         /* Write the back buffer backup back */
4927         if(backup) {
4928             if(texture_target != GL_TEXTURE_2D) {
4929                 glDisable(texture_target);
4930                 glEnable(GL_TEXTURE_2D);
4931                 texture_target = GL_TEXTURE_2D;
4932             }
4933             glBindTexture(GL_TEXTURE_2D, backup);
4934             checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4935         }
4936         else
4937         {
4938             if (texture_target != src_surface->texture_target)
4939             {
4940                 glDisable(texture_target);
4941                 glEnable(src_surface->texture_target);
4942                 texture_target = src_surface->texture_target;
4943             }
4944             glBindTexture(src_surface->texture_target, src_surface->texture_name);
4945             checkGLcall("glBindTexture(src_surface->texture_target, src_surface->texture_name)");
4946         }
4947
4948         glBegin(GL_QUADS);
4949             /* top left */
4950             glTexCoord2f(0.0f, 0.0f);
4951             glVertex2i(0, fbheight);
4952
4953             /* bottom left */
4954             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
4955             glVertex2i(0, 0);
4956
4957             /* bottom right */
4958             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
4959                     (float)fbheight / (float)src_surface->pow2Height);
4960             glVertex2i(fbwidth, 0);
4961
4962             /* top right */
4963             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
4964             glVertex2i(fbwidth, fbheight);
4965         glEnd();
4966     }
4967     glDisable(texture_target);
4968     checkGLcall("glDisable(texture_target)");
4969
4970     /* Cleanup */
4971     if (src != src_surface->texture_name && src != backup)
4972     {
4973         glDeleteTextures(1, &src);
4974         checkGLcall("glDeleteTextures(1, &src)");
4975     }
4976     if(backup) {
4977         glDeleteTextures(1, &backup);
4978         checkGLcall("glDeleteTextures(1, &backup)");
4979     }
4980
4981     LEAVE_GL();
4982
4983     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
4984
4985     context_release(context);
4986
4987     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4988      * path is never entered
4989      */
4990     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4991 }
4992
4993 /* Front buffer coordinates are always full screen coordinates, but our GL
4994  * drawable is limited to the window's client area. The sysmem and texture
4995  * copies do have the full screen size. Note that GL has a bottom-left
4996  * origin, while D3D has a top-left origin. */
4997 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
4998 {
4999     UINT drawable_height;
5000
5001     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5002             && surface == surface->container.u.swapchain->front_buffer)
5003     {
5004         POINT offset = {0, 0};
5005         RECT windowsize;
5006
5007         ScreenToClient(window, &offset);
5008         OffsetRect(rect, offset.x, offset.y);
5009
5010         GetClientRect(window, &windowsize);
5011         drawable_height = windowsize.bottom - windowsize.top;
5012     }
5013     else
5014     {
5015         drawable_height = surface->resource.height;
5016     }
5017
5018     rect->top = drawable_height - rect->top;
5019     rect->bottom = drawable_height - rect->bottom;
5020 }
5021
5022 /* blit between surface locations. onscreen on different swapchains is not supported.
5023  * depth / stencil is not supported. */
5024 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
5025         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
5026         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
5027 {
5028     const struct wined3d_gl_info *gl_info;
5029     struct wined3d_context *context;
5030     RECT src_rect, dst_rect;
5031     GLenum gl_filter;
5032     GLenum buffer;
5033
5034     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
5035     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
5036             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
5037     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
5038             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
5039
5040     src_rect = *src_rect_in;
5041     dst_rect = *dst_rect_in;
5042
5043     switch (filter)
5044     {
5045         case WINED3DTEXF_LINEAR:
5046             gl_filter = GL_LINEAR;
5047             break;
5048
5049         default:
5050             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
5051         case WINED3DTEXF_NONE:
5052         case WINED3DTEXF_POINT:
5053             gl_filter = GL_NEAREST;
5054             break;
5055     }
5056
5057     if (src_location == SFLAG_INDRAWABLE && surface_is_offscreen(src_surface))
5058         src_location = SFLAG_INTEXTURE;
5059     if (dst_location == SFLAG_INDRAWABLE && surface_is_offscreen(dst_surface))
5060         dst_location = SFLAG_INTEXTURE;
5061
5062     /* Make sure the locations are up-to-date. Loading the destination
5063      * surface isn't required if the entire surface is overwritten. (And is
5064      * in fact harmful if we're being called by surface_load_location() with
5065      * the purpose of loading the destination surface.) */
5066     surface_load_location(src_surface, src_location, NULL);
5067     if (!surface_is_full_rect(dst_surface, &dst_rect))
5068         surface_load_location(dst_surface, dst_location, NULL);
5069
5070     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
5071     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
5072     else context = context_acquire(device, NULL);
5073
5074     if (!context->valid)
5075     {
5076         context_release(context);
5077         WARN("Invalid context, skipping blit.\n");
5078         return;
5079     }
5080
5081     gl_info = context->gl_info;
5082
5083     if (src_location == SFLAG_INDRAWABLE)
5084     {
5085         TRACE("Source surface %p is onscreen.\n", src_surface);
5086         buffer = surface_get_gl_buffer(src_surface);
5087         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
5088     }
5089     else
5090     {
5091         TRACE("Source surface %p is offscreen.\n", src_surface);
5092         buffer = GL_COLOR_ATTACHMENT0;
5093     }
5094
5095     ENTER_GL();
5096     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
5097     glReadBuffer(buffer);
5098     checkGLcall("glReadBuffer()");
5099     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
5100     LEAVE_GL();
5101
5102     if (dst_location == SFLAG_INDRAWABLE)
5103     {
5104         TRACE("Destination surface %p is onscreen.\n", dst_surface);
5105         buffer = surface_get_gl_buffer(dst_surface);
5106         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5107     }
5108     else
5109     {
5110         TRACE("Destination surface %p is offscreen.\n", dst_surface);
5111         buffer = GL_COLOR_ATTACHMENT0;
5112     }
5113
5114     ENTER_GL();
5115     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
5116     context_set_draw_buffer(context, buffer);
5117     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
5118     context_invalidate_state(context, STATE_FRAMEBUFFER);
5119
5120     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
5121     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
5122     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
5123     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
5124     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
5125
5126     glDisable(GL_SCISSOR_TEST);
5127     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
5128
5129     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
5130             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
5131     checkGLcall("glBlitFramebuffer()");
5132
5133     LEAVE_GL();
5134
5135     if (wined3d_settings.strict_draw_ordering
5136             || (dst_location == SFLAG_INDRAWABLE
5137             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
5138         wglFlush();
5139
5140     context_release(context);
5141 }
5142
5143 static void surface_blt_to_drawable(struct wined3d_device *device,
5144         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5145         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5146         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5147 {
5148     struct wined3d_context *context;
5149     RECT src_rect, dst_rect;
5150
5151     src_rect = *src_rect_in;
5152     dst_rect = *dst_rect_in;
5153
5154     /* Make sure the surface is up-to-date. This should probably use
5155      * surface_load_location() and worry about the destination surface too,
5156      * unless we're overwriting it completely. */
5157     surface_internal_preload(src_surface, SRGB_RGB);
5158
5159     /* Activate the destination context, set it up for blitting */
5160     context = context_acquire(device, dst_surface);
5161     context_apply_blit_state(context, device);
5162
5163     if (!surface_is_offscreen(dst_surface))
5164         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5165
5166     device->blitter->set_shader(device->blit_priv, context->gl_info, src_surface);
5167
5168     ENTER_GL();
5169
5170     if (color_key)
5171     {
5172         glEnable(GL_ALPHA_TEST);
5173         checkGLcall("glEnable(GL_ALPHA_TEST)");
5174
5175         /* When the primary render target uses P8, the alpha component
5176          * contains the palette index. Which means that the colorkey is one of
5177          * the palette entries. In other cases pixels that should be masked
5178          * away have alpha set to 0. */
5179         if (primary_render_target_is_p8(device))
5180             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5181         else
5182             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5183         checkGLcall("glAlphaFunc");
5184     }
5185     else
5186     {
5187         glDisable(GL_ALPHA_TEST);
5188         checkGLcall("glDisable(GL_ALPHA_TEST)");
5189     }
5190
5191     draw_textured_quad(src_surface, &src_rect, &dst_rect, filter);
5192
5193     if (color_key)
5194     {
5195         glDisable(GL_ALPHA_TEST);
5196         checkGLcall("glDisable(GL_ALPHA_TEST)");
5197     }
5198
5199     LEAVE_GL();
5200
5201     /* Leave the opengl state valid for blitting */
5202     device->blitter->unset_shader(context->gl_info);
5203
5204     if (wined3d_settings.strict_draw_ordering
5205             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5206             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5207         wglFlush(); /* Flush to ensure ordering across contexts. */
5208
5209     context_release(context);
5210 }
5211
5212 /* Do not call while under the GL lock. */
5213 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5214 {
5215     struct wined3d_device *device = s->resource.device;
5216     const struct blit_shader *blitter;
5217
5218     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5219             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5220     if (!blitter)
5221     {
5222         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5223         return WINED3DERR_INVALIDCALL;
5224     }
5225
5226     return blitter->color_fill(device, s, rect, color);
5227 }
5228
5229 /* Do not call while under the GL lock. */
5230 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5231         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5232         WINED3DTEXTUREFILTERTYPE Filter)
5233 {
5234     struct wined3d_device *device = dst_surface->resource.device;
5235     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5236     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5237
5238     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5239             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5240             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5241
5242     /* Get the swapchain. One of the surfaces has to be a primary surface */
5243     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5244     {
5245         WARN("Destination is in sysmem, rejecting gl blt\n");
5246         return WINED3DERR_INVALIDCALL;
5247     }
5248
5249     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5250         dstSwapchain = dst_surface->container.u.swapchain;
5251
5252     if (src_surface)
5253     {
5254         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5255         {
5256             WARN("Src is in sysmem, rejecting gl blt\n");
5257             return WINED3DERR_INVALIDCALL;
5258         }
5259
5260         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5261             srcSwapchain = src_surface->container.u.swapchain;
5262     }
5263
5264     /* Early sort out of cases where no render target is used */
5265     if (!dstSwapchain && !srcSwapchain
5266             && src_surface != device->fb.render_targets[0]
5267             && dst_surface != device->fb.render_targets[0])
5268     {
5269         TRACE("No surface is render target, not using hardware blit.\n");
5270         return WINED3DERR_INVALIDCALL;
5271     }
5272
5273     /* No destination color keying supported */
5274     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5275     {
5276         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5277         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5278         return WINED3DERR_INVALIDCALL;
5279     }
5280
5281     /* The only case where both surfaces on a swapchain are supported is a back buffer -> front buffer blit on the same swapchain */
5282     if (dstSwapchain && dstSwapchain == srcSwapchain && dstSwapchain->back_buffers
5283             && dst_surface == dstSwapchain->front_buffer
5284             && src_surface == dstSwapchain->back_buffers[0])
5285     {
5286         /* Half-Life does a Blt from the back buffer to the front buffer,
5287          * Full surface size, no flags... Use present instead
5288          *
5289          * This path will only be entered for d3d7 and ddraw apps, because d3d8/9 offer no way to blit TO the front buffer
5290          */
5291
5292         /* Check rects - wined3d_swapchain_present() doesn't handle them. */
5293         for (;;)
5294         {
5295             TRACE("Looking if a Present can be done...\n");
5296             /* Source Rectangle must be full surface */
5297             if (src_rect->left || src_rect->top
5298                     || src_rect->right != src_surface->resource.width
5299                     || src_rect->bottom != src_surface->resource.height)
5300             {
5301                 TRACE("No, Source rectangle doesn't match\n");
5302                 break;
5303             }
5304
5305             /* No stretching may occur */
5306             if (src_rect->right != dst_rect->right - dst_rect->left
5307                     || src_rect->bottom != dst_rect->bottom - dst_rect->top)
5308             {
5309                 TRACE("No, stretching is done\n");
5310                 break;
5311             }
5312
5313             /* Destination must be full surface or match the clipping rectangle */
5314             if (dst_surface->clipper && dst_surface->clipper->hWnd)
5315             {
5316                 RECT cliprect;
5317                 POINT pos[2];
5318                 GetClientRect(dst_surface->clipper->hWnd, &cliprect);
5319                 pos[0].x = dst_rect->left;
5320                 pos[0].y = dst_rect->top;
5321                 pos[1].x = dst_rect->right;
5322                 pos[1].y = dst_rect->bottom;
5323                 MapWindowPoints(GetDesktopWindow(), dst_surface->clipper->hWnd, pos, 2);
5324
5325                 if (pos[0].x != cliprect.left || pos[0].y != cliprect.top
5326                         || pos[1].x != cliprect.right || pos[1].y != cliprect.bottom)
5327                 {
5328                     TRACE("No, dest rectangle doesn't match(clipper)\n");
5329                     TRACE("Clip rect at %s\n", wine_dbgstr_rect(&cliprect));
5330                     TRACE("Blt dest: %s\n", wine_dbgstr_rect(dst_rect));
5331                     break;
5332                 }
5333             }
5334             else if (dst_rect->left || dst_rect->top
5335                     || dst_rect->right != dst_surface->resource.width
5336                     || dst_rect->bottom != dst_surface->resource.height)
5337             {
5338                 TRACE("No, dest rectangle doesn't match(surface size)\n");
5339                 break;
5340             }
5341
5342             TRACE("Yes\n");
5343
5344             /* These flags are unimportant for the flag check, remove them */
5345             if (!(flags & ~(WINEDDBLT_DONOTWAIT | WINEDDBLT_WAIT)))
5346             {
5347                 WINED3DSWAPEFFECT orig_swap = dstSwapchain->presentParms.SwapEffect;
5348
5349                 /* The idea behind this is that a glReadPixels and a glDrawPixels call
5350                     * take very long, while a flip is fast.
5351                     * This applies to Half-Life, which does such Blts every time it finished
5352                     * a frame, and to Prince of Persia 3D, which uses this to draw at least the main
5353                     * menu. This is also used by all apps when they do windowed rendering
5354                     *
5355                     * The problem is that flipping is not really the same as copying. After a
5356                     * Blt the front buffer is a copy of the back buffer, and the back buffer is
5357                     * untouched. Therefore it's necessary to override the swap effect
5358                     * and to set it back after the flip.
5359                     *
5360                     * Windowed Direct3D < 7 apps do the same. The D3D7 sdk demos are nice
5361                     * testcases.
5362                     */
5363
5364                 dstSwapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
5365                 dstSwapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
5366
5367                 TRACE("Full screen back buffer -> front buffer blt, performing a flip instead.\n");
5368                 wined3d_swapchain_present(dstSwapchain, NULL, NULL, dstSwapchain->win_handle, NULL, 0);
5369
5370                 dstSwapchain->presentParms.SwapEffect = orig_swap;
5371
5372                 return WINED3D_OK;
5373             }
5374             break;
5375         }
5376
5377         TRACE("Unsupported blit between buffers on the same swapchain\n");
5378         return WINED3DERR_INVALIDCALL;
5379     } else if(dstSwapchain && dstSwapchain == srcSwapchain) {
5380         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5381         return WINED3DERR_INVALIDCALL;
5382     } else if(dstSwapchain && srcSwapchain) {
5383         FIXME("Implement hardware blit between two different swapchains\n");
5384         return WINED3DERR_INVALIDCALL;
5385     }
5386     else if (dstSwapchain)
5387     {
5388         /* Handled with regular texture -> swapchain blit */
5389         if (src_surface == device->fb.render_targets[0])
5390             TRACE("Blit from active render target to a swapchain\n");
5391     }
5392     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5393     {
5394         FIXME("Implement blit from a swapchain to the active render target\n");
5395         return WINED3DERR_INVALIDCALL;
5396     }
5397
5398     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5399     {
5400         /* Blit from render target to texture */
5401         BOOL stretchx;
5402
5403         /* P8 read back is not implemented */
5404         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5405                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5406         {
5407             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5408             return WINED3DERR_INVALIDCALL;
5409         }
5410
5411         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5412         {
5413             TRACE("Color keying not supported by frame buffer to texture blit\n");
5414             return WINED3DERR_INVALIDCALL;
5415             /* Destination color key is checked above */
5416         }
5417
5418         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5419             stretchx = TRUE;
5420         else
5421             stretchx = FALSE;
5422
5423         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5424          * flip the image nor scale it.
5425          *
5426          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5427          * -> If the app wants a image width an unscaled width, copy it line per line
5428          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5429          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5430          *    back buffer. This is slower than reading line per line, thus not used for flipping
5431          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5432          *    pixel by pixel
5433          *
5434          * If EXT_framebuffer_blit is supported that can be used instead. Note that EXT_framebuffer_blit implies
5435          * FBO support, so it doesn't really make sense to try and make it work with different offscreen rendering
5436          * backends. */
5437         if (fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5438                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5439                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5440         {
5441             surface_blt_fbo(device, Filter,
5442                     src_surface, SFLAG_INDRAWABLE, src_rect,
5443                     dst_surface, SFLAG_INDRAWABLE, dst_rect);
5444             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5445         }
5446         else if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5447                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5448         {
5449             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5450             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5451         } else {
5452             TRACE("Using hardware stretching to flip / stretch the texture\n");
5453             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5454         }
5455
5456         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5457         {
5458             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5459             dst_surface->resource.allocatedMemory = NULL;
5460             dst_surface->resource.heapMemory = NULL;
5461         }
5462         else
5463         {
5464             dst_surface->flags &= ~SFLAG_INSYSMEM;
5465         }
5466
5467         return WINED3D_OK;
5468     }
5469     else if (src_surface)
5470     {
5471         /* Blit from offscreen surface to render target */
5472         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5473         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5474
5475         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5476
5477         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5478                 && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5479                         src_rect, src_surface->resource.usage, src_surface->resource.pool,
5480                         src_surface->resource.format,
5481                         dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5482                         dst_surface->resource.format))
5483         {
5484             TRACE("Using surface_blt_fbo.\n");
5485             /* The source is always a texture, but never the currently active render target, and the texture
5486              * contents are never upside down. */
5487             surface_blt_fbo(device, Filter,
5488                     src_surface, SFLAG_INDRAWABLE, src_rect,
5489                     dst_surface, SFLAG_INDRAWABLE, dst_rect);
5490             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5491             return WINED3D_OK;
5492         }
5493
5494         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5495                 && arbfp_blit.blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5496                         src_rect, src_surface->resource.usage, src_surface->resource.pool,
5497                         src_surface->resource.format,
5498                         dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5499                         dst_surface->resource.format))
5500         {
5501             return arbfp_blit_surface(device, src_surface, src_rect, dst_surface, dst_rect,
5502                     WINED3D_BLIT_OP_COLOR_BLIT, Filter);
5503         }
5504
5505         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5506                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5507                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5508         {
5509             FIXME("Unsupported blit operation falling back to software\n");
5510             return WINED3DERR_INVALIDCALL;
5511         }
5512
5513         /* Color keying: Check if we have to do a color keyed blt,
5514          * and if not check if a color key is activated.
5515          *
5516          * Just modify the color keying parameters in the surface and restore them afterwards
5517          * The surface keeps track of the color key last used to load the opengl surface.
5518          * PreLoad will catch the change to the flags and color key and reload if necessary.
5519          */
5520         if (flags & WINEDDBLT_KEYSRC)
5521         {
5522             /* Use color key from surface */
5523         }
5524         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5525         {
5526             /* Use color key from DDBltFx */
5527             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5528             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5529         }
5530         else
5531         {
5532             /* Do not use color key */
5533             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5534         }
5535
5536         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5537                 src_surface, src_rect, dst_surface, dst_rect);
5538
5539         /* Restore the color key parameters */
5540         src_surface->CKeyFlags = oldCKeyFlags;
5541         src_surface->SrcBltCKey = oldBltCKey;
5542
5543         surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5544
5545         return WINED3D_OK;
5546     }
5547     else
5548     {
5549         /* Source-Less Blit to render target */
5550         if (flags & WINEDDBLT_COLORFILL)
5551         {
5552             WINED3DCOLORVALUE color;
5553
5554             TRACE("Colorfill\n");
5555
5556             /* The color as given in the Blt function is in the surface format. */
5557             if (!surface_convert_color_to_float(dst_surface, DDBltFx->u5.dwFillColor, &color))
5558                 return WINED3DERR_INVALIDCALL;
5559
5560             return surface_color_fill(dst_surface, dst_rect, &color);
5561         }
5562     }
5563
5564     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5565     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5566     return WINED3DERR_INVALIDCALL;
5567 }
5568
5569 /* GL locking is done by the caller */
5570 static void surface_depth_blt(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
5571         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5572 {
5573     struct wined3d_device *device = surface->resource.device;
5574     GLint compare_mode = GL_NONE;
5575     struct blt_info info;
5576     GLint old_binding = 0;
5577     RECT rect;
5578
5579     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5580
5581     glDisable(GL_CULL_FACE);
5582     glDisable(GL_BLEND);
5583     glDisable(GL_ALPHA_TEST);
5584     glDisable(GL_SCISSOR_TEST);
5585     glDisable(GL_STENCIL_TEST);
5586     glEnable(GL_DEPTH_TEST);
5587     glDepthFunc(GL_ALWAYS);
5588     glDepthMask(GL_TRUE);
5589     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5590     glViewport(x, y, w, h);
5591
5592     SetRect(&rect, 0, h, w, 0);
5593     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5594     GL_EXTCALL(glActiveTextureARB(GL_TEXTURE0_ARB));
5595     glGetIntegerv(info.binding, &old_binding);
5596     glBindTexture(info.bind_target, texture);
5597     if (gl_info->supported[ARB_SHADOW])
5598     {
5599         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5600         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5601     }
5602
5603     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5604             gl_info, info.tex_type, &surface->ds_current_size);
5605
5606     glBegin(GL_TRIANGLE_STRIP);
5607     glTexCoord3fv(info.coords[0]);
5608     glVertex2f(-1.0f, -1.0f);
5609     glTexCoord3fv(info.coords[1]);
5610     glVertex2f(1.0f, -1.0f);
5611     glTexCoord3fv(info.coords[2]);
5612     glVertex2f(-1.0f, 1.0f);
5613     glTexCoord3fv(info.coords[3]);
5614     glVertex2f(1.0f, 1.0f);
5615     glEnd();
5616
5617     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5618     glBindTexture(info.bind_target, old_binding);
5619
5620     glPopAttrib();
5621
5622     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5623 }
5624
5625 void surface_modify_ds_location(struct wined3d_surface *surface,
5626         DWORD location, UINT w, UINT h)
5627 {
5628     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5629
5630     if (location & ~SFLAG_DS_LOCATIONS)
5631         FIXME("Invalid location (%#x) specified.\n", location);
5632
5633     surface->ds_current_size.cx = w;
5634     surface->ds_current_size.cy = h;
5635     surface->flags &= ~SFLAG_DS_LOCATIONS;
5636     surface->flags |= location;
5637 }
5638
5639 /* Context activation is done by the caller. */
5640 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5641 {
5642     struct wined3d_device *device = surface->resource.device;
5643     const struct wined3d_gl_info *gl_info = context->gl_info;
5644     GLsizei w, h;
5645
5646     TRACE("surface %p, new location %#x.\n", surface, location);
5647
5648     /* TODO: Make this work for modes other than FBO */
5649     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5650
5651     if (!(surface->flags & location))
5652     {
5653         w = surface->ds_current_size.cx;
5654         h = surface->ds_current_size.cy;
5655         surface->ds_current_size.cx = 0;
5656         surface->ds_current_size.cy = 0;
5657     }
5658     else
5659     {
5660         w = surface->resource.width;
5661         h = surface->resource.height;
5662     }
5663
5664     if (surface->ds_current_size.cx == surface->resource.width
5665             && surface->ds_current_size.cy == surface->resource.height)
5666     {
5667         TRACE("Location (%#x) is already up to date.\n", location);
5668         return;
5669     }
5670
5671     if (surface->current_renderbuffer)
5672     {
5673         FIXME("Not supported with fixed up depth stencil.\n");
5674         return;
5675     }
5676
5677     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5678     {
5679         /* This mostly happens when a depth / stencil is used without being
5680          * cleared first. In principle we could upload from sysmem, or
5681          * explicitly clear before first usage. For the moment there don't
5682          * appear to be a lot of applications depending on this, so a FIXME
5683          * should do. */
5684         FIXME("No up to date depth stencil location.\n");
5685         surface->flags |= location;
5686         surface->ds_current_size.cx = surface->resource.width;
5687         surface->ds_current_size.cy = surface->resource.height;
5688         return;
5689     }
5690
5691     if (location == SFLAG_DS_OFFSCREEN)
5692     {
5693         GLint old_binding = 0;
5694         GLenum bind_target;
5695
5696         /* The render target is allowed to be smaller than the depth/stencil
5697          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5698          * than the offscreen surface. Don't overwrite the offscreen surface
5699          * with undefined data. */
5700         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5701         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5702
5703         TRACE("Copying onscreen depth buffer to depth texture.\n");
5704
5705         ENTER_GL();
5706
5707         if (!device->depth_blt_texture)
5708         {
5709             glGenTextures(1, &device->depth_blt_texture);
5710         }
5711
5712         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5713          * directly on the FBO texture. That's because we need to flip. */
5714         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5715                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5716         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5717         {
5718             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5719             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5720         }
5721         else
5722         {
5723             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5724             bind_target = GL_TEXTURE_2D;
5725         }
5726         glBindTexture(bind_target, device->depth_blt_texture);
5727         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5728          * internal format, because the internal format might include stencil
5729          * data. In principle we should copy stencil data as well, but unless
5730          * the driver supports stencil export it's hard to do, and doesn't
5731          * seem to be needed in practice. If the hardware doesn't support
5732          * writing stencil data, the glCopyTexImage2D() call might trigger
5733          * software fallbacks. */
5734         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5735         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5736         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5737         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5738         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5739         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5740         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5741         glBindTexture(bind_target, old_binding);
5742
5743         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5744                 NULL, surface, SFLAG_INTEXTURE);
5745         context_set_draw_buffer(context, GL_NONE);
5746
5747         /* Do the actual blit */
5748         surface_depth_blt(surface, gl_info, device->depth_blt_texture, 0, 0, w, h, bind_target);
5749         checkGLcall("depth_blt");
5750
5751         context_invalidate_state(context, STATE_FRAMEBUFFER);
5752
5753         LEAVE_GL();
5754
5755         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5756     }
5757     else if (location == SFLAG_DS_ONSCREEN)
5758     {
5759         TRACE("Copying depth texture to onscreen depth buffer.\n");
5760
5761         ENTER_GL();
5762
5763         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5764                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5765         surface_depth_blt(surface, gl_info, surface->texture_name,
5766                 0, surface->pow2Height - h, w, h, surface->texture_target);
5767         checkGLcall("depth_blt");
5768
5769         context_invalidate_state(context, STATE_FRAMEBUFFER);
5770
5771         LEAVE_GL();
5772
5773         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5774     }
5775     else
5776     {
5777         ERR("Invalid location (%#x) specified.\n", location);
5778     }
5779
5780     surface->flags |= location;
5781     surface->ds_current_size.cx = surface->resource.width;
5782     surface->ds_current_size.cy = surface->resource.height;
5783 }
5784
5785 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5786 {
5787     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5788     struct wined3d_surface *overlay;
5789
5790     TRACE("surface %p, location %s, persistent %#x.\n",
5791             surface, debug_surflocation(location), persistent);
5792
5793     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
5794     {
5795         if (surface_is_offscreen(surface))
5796         {
5797             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same
5798              * for offscreen targets. */
5799             if (location & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE))
5800                 location |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
5801         }
5802         else
5803         {
5804             TRACE("Surface %p is an onscreen surface.\n", surface);
5805         }
5806     }
5807
5808     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5809             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5810         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5811
5812     if (persistent)
5813     {
5814         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5815                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5816         {
5817             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5818             {
5819                 TRACE("Passing to container.\n");
5820                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5821             }
5822         }
5823         surface->flags &= ~SFLAG_LOCATIONS;
5824         surface->flags |= location;
5825
5826         /* Redraw emulated overlays, if any */
5827         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5828         {
5829             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5830             {
5831                 overlay->surface_ops->surface_draw_overlay(overlay);
5832             }
5833         }
5834     }
5835     else
5836     {
5837         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5838         {
5839             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5840             {
5841                 TRACE("Passing to container\n");
5842                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5843             }
5844         }
5845         surface->flags &= ~location;
5846     }
5847
5848     if (!(surface->flags & SFLAG_LOCATIONS))
5849     {
5850         ERR("Surface %p does not have any up to date location.\n", surface);
5851     }
5852 }
5853
5854 static DWORD resource_access_from_location(DWORD location)
5855 {
5856     switch (location)
5857     {
5858         case SFLAG_INSYSMEM:
5859             return WINED3D_RESOURCE_ACCESS_CPU;
5860
5861         case SFLAG_INDRAWABLE:
5862         case SFLAG_INSRGBTEX:
5863         case SFLAG_INTEXTURE:
5864             return WINED3D_RESOURCE_ACCESS_GPU;
5865
5866         default:
5867             FIXME("Unhandled location %#x.\n", location);
5868             return 0;
5869     }
5870 }
5871
5872 static void surface_load_sysmem(struct wined3d_surface *surface,
5873         const struct wined3d_gl_info *gl_info, const RECT *rect)
5874 {
5875     surface_prepare_system_memory(surface);
5876
5877     /* Download the surface to system memory. */
5878     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5879     {
5880         struct wined3d_device *device = surface->resource.device;
5881         struct wined3d_context *context = NULL;
5882
5883         if (!device->isInDraw)
5884             context = context_acquire(device, NULL);
5885
5886         surface_bind_and_dirtify(surface, gl_info, !(surface->flags & SFLAG_INTEXTURE));
5887         surface_download_data(surface, gl_info);
5888
5889         if (context)
5890             context_release(context);
5891
5892         return;
5893     }
5894
5895     /* Note: It might be faster to download into a texture first. */
5896     read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5897             wined3d_surface_get_pitch(surface));
5898 }
5899
5900 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5901         const struct wined3d_gl_info *gl_info, const RECT *rect)
5902 {
5903     struct wined3d_device *device = surface->resource.device;
5904     struct wined3d_format format;
5905     CONVERT_TYPES convert;
5906     UINT byte_count;
5907     BYTE *mem;
5908
5909     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5910         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5911
5912     if (surface->flags & SFLAG_INTEXTURE)
5913     {
5914         RECT r;
5915
5916         surface_get_rect(surface, rect, &r);
5917         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5918
5919         return WINED3D_OK;
5920     }
5921
5922     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5923     {
5924         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5925          * path through sysmem. */
5926         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5927     }
5928
5929     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5930
5931     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5932      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5933      * called. */
5934     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5935     {
5936         struct wined3d_context *context = NULL;
5937
5938         TRACE("Removing the pbo attached to surface %p.\n", surface);
5939
5940         if (!device->isInDraw)
5941             context = context_acquire(device, NULL);
5942
5943         surface_remove_pbo(surface, gl_info);
5944
5945         if (context)
5946             context_release(context);
5947     }
5948
5949     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5950     {
5951         UINT height = surface->resource.height;
5952         UINT width = surface->resource.width;
5953         UINT src_pitch, dst_pitch;
5954
5955         byte_count = format.conv_byte_count;
5956         src_pitch = wined3d_surface_get_pitch(surface);
5957
5958         /* Stick to the alignment for the converted surface too, makes it
5959          * easier to load the surface. */
5960         dst_pitch = width * byte_count;
5961         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5962
5963         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5964         {
5965             ERR("Out of memory (%u).\n", dst_pitch * height);
5966             return E_OUTOFMEMORY;
5967         }
5968
5969         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5970                 src_pitch, width, height, dst_pitch, convert, surface);
5971
5972         surface->flags |= SFLAG_CONVERTED;
5973     }
5974     else
5975     {
5976         surface->flags &= ~SFLAG_CONVERTED;
5977         mem = surface->resource.allocatedMemory;
5978         byte_count = format.byte_count;
5979     }
5980
5981     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5982
5983     /* Don't delete PBO memory. */
5984     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5985         HeapFree(GetProcessHeap(), 0, mem);
5986
5987     return WINED3D_OK;
5988 }
5989
5990 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5991         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5992 {
5993     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
5994     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5995     struct wined3d_device *device = surface->resource.device;
5996     struct wined3d_context *context = NULL;
5997     UINT width, src_pitch, dst_pitch;
5998     struct wined3d_bo_address data;
5999     struct wined3d_format format;
6000     POINT dst_point = {0, 0};
6001     CONVERT_TYPES convert;
6002     BYTE *mem;
6003
6004     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6005             && surface_is_offscreen(surface)
6006             && (surface->flags & SFLAG_INDRAWABLE))
6007     {
6008         read_from_framebuffer_texture(surface, srgb);
6009
6010         return WINED3D_OK;
6011     }
6012
6013     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6014             && (surface->resource.format->flags & attach_flags) == attach_flags
6015             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6016                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6017                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6018     {
6019         if (srgb)
6020             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
6021                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6022         else
6023             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
6024                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6025
6026         return WINED3D_OK;
6027     }
6028
6029     /* Upload from system memory */
6030
6031     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6032             TRUE /* We will use textures */, &format, &convert);
6033
6034     if (srgb)
6035     {
6036         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6037         {
6038             /* Performance warning... */
6039             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6040             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6041         }
6042     }
6043     else
6044     {
6045         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6046         {
6047             /* Performance warning... */
6048             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6049             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6050         }
6051     }
6052
6053     if (!(surface->flags & SFLAG_INSYSMEM))
6054     {
6055         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6056         /* Lets hope we get it from somewhere... */
6057         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6058     }
6059
6060     if (!device->isInDraw)
6061         context = context_acquire(device, NULL);
6062
6063     surface_prepare_texture(surface, gl_info, srgb);
6064     surface_bind_and_dirtify(surface, gl_info, srgb);
6065
6066     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6067     {
6068         surface->flags |= SFLAG_GLCKEY;
6069         surface->glCKey = surface->SrcBltCKey;
6070     }
6071     else surface->flags &= ~SFLAG_GLCKEY;
6072
6073     width = surface->resource.width;
6074     src_pitch = wined3d_surface_get_pitch(surface);
6075
6076     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6077      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6078      * called. */
6079     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6080     {
6081         TRACE("Removing the pbo attached to surface %p.\n", surface);
6082         surface_remove_pbo(surface, gl_info);
6083     }
6084
6085     if (format.convert)
6086     {
6087         /* This code is entered for texture formats which need a fixup. */
6088         UINT height = surface->resource.height;
6089
6090         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6091         dst_pitch = width * format.conv_byte_count;
6092         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6093
6094         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6095         {
6096             ERR("Out of memory (%u).\n", dst_pitch * height);
6097             if (context)
6098                 context_release(context);
6099             return E_OUTOFMEMORY;
6100         }
6101         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6102     }
6103     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6104     {
6105         /* This code is only entered for color keying fixups */
6106         UINT height = surface->resource.height;
6107
6108         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6109         dst_pitch = width * format.conv_byte_count;
6110         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6111
6112         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6113         {
6114             ERR("Out of memory (%u).\n", dst_pitch * height);
6115             if (context)
6116                 context_release(context);
6117             return E_OUTOFMEMORY;
6118         }
6119         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6120                 width, height, dst_pitch, convert, surface);
6121     }
6122     else
6123     {
6124         mem = surface->resource.allocatedMemory;
6125     }
6126
6127     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6128     data.addr = mem;
6129     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6130
6131     if (context)
6132         context_release(context);
6133
6134     /* Don't delete PBO memory. */
6135     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6136         HeapFree(GetProcessHeap(), 0, mem);
6137
6138     return WINED3D_OK;
6139 }
6140
6141 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6142 {
6143     struct wined3d_device *device = surface->resource.device;
6144     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6145     BOOL in_fbo = FALSE;
6146     HRESULT hr;
6147
6148     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6149
6150     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6151     {
6152         if (location == SFLAG_INTEXTURE)
6153         {
6154             struct wined3d_context *context = context_acquire(device, NULL);
6155             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6156             context_release(context);
6157             return WINED3D_OK;
6158         }
6159         else
6160         {
6161             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6162             return WINED3DERR_INVALIDCALL;
6163         }
6164     }
6165
6166     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6167     {
6168         if (surface_is_offscreen(surface))
6169         {
6170             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same
6171              * for offscreen targets. Prefer SFLAG_INTEXTURE. */
6172             if (location == SFLAG_INDRAWABLE)
6173                 location = SFLAG_INTEXTURE;
6174             in_fbo = TRUE;
6175         }
6176         else
6177         {
6178             TRACE("Surface %p is an onscreen surface.\n", surface);
6179         }
6180     }
6181
6182     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6183         location = SFLAG_INTEXTURE;
6184
6185     if (surface->flags & location)
6186     {
6187         TRACE("Location already up to date.\n");
6188         return WINED3D_OK;
6189     }
6190
6191     if (WARN_ON(d3d_surface))
6192     {
6193         DWORD required_access = resource_access_from_location(location);
6194         if ((surface->resource.access_flags & required_access) != required_access)
6195             WARN("Operation requires %#x access, but surface only has %#x.\n",
6196                     required_access, surface->resource.access_flags);
6197     }
6198
6199     if (!(surface->flags & SFLAG_LOCATIONS))
6200     {
6201         ERR("Surface %p does not have any up to date location.\n", surface);
6202         surface->flags |= SFLAG_LOST;
6203         return WINED3DERR_DEVICELOST;
6204     }
6205
6206     switch (location)
6207     {
6208         case SFLAG_INSYSMEM:
6209             surface_load_sysmem(surface, gl_info, rect);
6210             break;
6211
6212         case SFLAG_INDRAWABLE:
6213             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6214                 return hr;
6215             break;
6216
6217         case SFLAG_INTEXTURE:
6218         case SFLAG_INSRGBTEX:
6219             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6220                 return hr;
6221             break;
6222
6223         default:
6224             ERR("Don't know how to handle location %#x.\n", location);
6225             break;
6226     }
6227
6228     if (!rect)
6229     {
6230         surface->flags |= location;
6231
6232         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6233             surface_evict_sysmem(surface);
6234     }
6235
6236     if (in_fbo && (surface->flags & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)))
6237     {
6238         /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
6239         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
6240     }
6241
6242     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6243             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6244     {
6245         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6246     }
6247
6248     return WINED3D_OK;
6249 }
6250
6251 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6252 {
6253     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6254
6255     /* Not on a swapchain - must be offscreen */
6256     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6257
6258     /* The front buffer is always onscreen */
6259     if (surface == swapchain->front_buffer) return FALSE;
6260
6261     /* If the swapchain is rendered to an FBO, the backbuffer is
6262      * offscreen, otherwise onscreen */
6263     return swapchain->render_to_fbo;
6264 }
6265
6266 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6267 /* Context activation is done by the caller. */
6268 static void ffp_blit_free(struct wined3d_device *device) { }
6269
6270 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6271 /* Context activation is done by the caller. */
6272 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6273 {
6274     BYTE table[256][4];
6275     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6276
6277     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6278
6279     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6280     ENTER_GL();
6281     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6282     LEAVE_GL();
6283 }
6284
6285 /* Context activation is done by the caller. */
6286 static HRESULT ffp_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6287 {
6288     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6289
6290     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6291      * else the surface is converted in software at upload time in LoadLocation.
6292      */
6293     if(fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6294         ffp_blit_p8_upload_palette(surface, gl_info);
6295
6296     ENTER_GL();
6297     glEnable(surface->texture_target);
6298     checkGLcall("glEnable(surface->texture_target)");
6299     LEAVE_GL();
6300     return WINED3D_OK;
6301 }
6302
6303 /* Context activation is done by the caller. */
6304 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6305 {
6306     ENTER_GL();
6307     glDisable(GL_TEXTURE_2D);
6308     checkGLcall("glDisable(GL_TEXTURE_2D)");
6309     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6310     {
6311         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6312         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6313     }
6314     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6315     {
6316         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6317         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6318     }
6319     LEAVE_GL();
6320 }
6321
6322 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6323         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6324         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6325 {
6326     enum complex_fixup src_fixup;
6327
6328     switch (blit_op)
6329     {
6330         case WINED3D_BLIT_OP_COLOR_BLIT:
6331             src_fixup = get_complex_fixup(src_format->color_fixup);
6332             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6333             {
6334                 TRACE("Checking support for fixup:\n");
6335                 dump_color_fixup_desc(src_format->color_fixup);
6336             }
6337
6338             if (!is_identity_fixup(dst_format->color_fixup))
6339             {
6340                 TRACE("Destination fixups are not supported\n");
6341                 return FALSE;
6342             }
6343
6344             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6345             {
6346                 TRACE("P8 fixup supported\n");
6347                 return TRUE;
6348             }
6349
6350             /* We only support identity conversions. */
6351             if (is_identity_fixup(src_format->color_fixup))
6352             {
6353                 TRACE("[OK]\n");
6354                 return TRUE;
6355             }
6356
6357             TRACE("[FAILED]\n");
6358             return FALSE;
6359
6360         case WINED3D_BLIT_OP_COLOR_FILL:
6361             if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6362             {
6363                 TRACE("Color fill not supported\n");
6364                 return FALSE;
6365             }
6366
6367             return TRUE;
6368
6369         case WINED3D_BLIT_OP_DEPTH_FILL:
6370             return TRUE;
6371
6372         default:
6373             TRACE("Unsupported blit_op=%d\n", blit_op);
6374             return FALSE;
6375     }
6376 }
6377
6378 /* Do not call while under the GL lock. */
6379 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6380         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6381 {
6382     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6383     struct wined3d_fb_state fb = {&dst_surface, NULL};
6384
6385     return device_clear_render_targets(device, 1, &fb,
6386             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6387 }
6388
6389 /* Do not call while under the GL lock. */
6390 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6391         struct wined3d_surface *surface, const RECT *rect, float depth)
6392 {
6393     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6394     struct wined3d_fb_state fb = {NULL, surface};
6395
6396     return device_clear_render_targets(device, 0, &fb,
6397             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6398 }
6399
6400 const struct blit_shader ffp_blit =  {
6401     ffp_blit_alloc,
6402     ffp_blit_free,
6403     ffp_blit_set,
6404     ffp_blit_unset,
6405     ffp_blit_supported,
6406     ffp_blit_color_fill,
6407     ffp_blit_depth_fill,
6408 };
6409
6410 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6411 {
6412     return WINED3D_OK;
6413 }
6414
6415 /* Context activation is done by the caller. */
6416 static void cpu_blit_free(struct wined3d_device *device)
6417 {
6418 }
6419
6420 /* Context activation is done by the caller. */
6421 static HRESULT cpu_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6422 {
6423     return WINED3D_OK;
6424 }
6425
6426 /* Context activation is done by the caller. */
6427 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6428 {
6429 }
6430
6431 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6432         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6433         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6434 {
6435     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6436     {
6437         return TRUE;
6438     }
6439
6440     return FALSE;
6441 }
6442
6443 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6444         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6445         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6446 {
6447     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6448     const struct wined3d_format *src_format, *dst_format;
6449     struct wined3d_surface *orig_src = src_surface;
6450     WINED3DLOCKED_RECT dlock, slock;
6451     HRESULT hr = WINED3D_OK;
6452     const BYTE *sbuf;
6453     RECT xdst,xsrc;
6454     BYTE *dbuf;
6455     int x, y;
6456
6457     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6458             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6459             flags, fx, debug_d3dtexturefiltertype(filter));
6460
6461     /* First check for the validity of source / destination rectangles.
6462      * This was verified using a test application and by MSDN. */
6463     if (src_rect)
6464     {
6465         if (src_surface)
6466         {
6467             if (src_rect->right < src_rect->left || src_rect->bottom < src_rect->top
6468                     || src_rect->left > src_surface->resource.width || src_rect->left < 0
6469                     || src_rect->top > src_surface->resource.height || src_rect->top < 0
6470                     || src_rect->right > src_surface->resource.width || src_rect->right < 0
6471                     || src_rect->bottom > src_surface->resource.height || src_rect->bottom < 0)
6472             {
6473                 WARN("Application gave us bad source rectangle for Blt.\n");
6474                 return WINEDDERR_INVALIDRECT;
6475             }
6476
6477             if (!src_rect->right || !src_rect->bottom
6478                     || src_rect->left == (int)src_surface->resource.width
6479                     || src_rect->top == (int)src_surface->resource.height)
6480             {
6481                 TRACE("Nothing to be done.\n");
6482                 return WINED3D_OK;
6483             }
6484         }
6485
6486         xsrc = *src_rect;
6487     }
6488     else if (src_surface)
6489     {
6490         xsrc.left = 0;
6491         xsrc.top = 0;
6492         xsrc.right = src_surface->resource.width;
6493         xsrc.bottom = src_surface->resource.height;
6494     }
6495     else
6496     {
6497         memset(&xsrc, 0, sizeof(xsrc));
6498     }
6499
6500     if (dst_rect)
6501     {
6502         /* For the Destination rect, it can be out of bounds on the condition
6503          * that a clipper is set for the given surface. */
6504         if (!dst_surface->clipper && (dst_rect->right < dst_rect->left || dst_rect->bottom < dst_rect->top
6505                 || dst_rect->left > dst_surface->resource.width || dst_rect->left < 0
6506                 || dst_rect->top > dst_surface->resource.height || dst_rect->top < 0
6507                 || dst_rect->right > dst_surface->resource.width || dst_rect->right < 0
6508                 || dst_rect->bottom > dst_surface->resource.height || dst_rect->bottom < 0))
6509         {
6510             WARN("Application gave us bad destination rectangle for Blt without a clipper set.\n");
6511             return WINEDDERR_INVALIDRECT;
6512         }
6513
6514         if (dst_rect->right <= 0 || dst_rect->bottom <= 0
6515                 || dst_rect->left >= (int)dst_surface->resource.width
6516                 || dst_rect->top >= (int)dst_surface->resource.height)
6517         {
6518             TRACE("Nothing to be done.\n");
6519             return WINED3D_OK;
6520         }
6521
6522         if (!src_surface)
6523         {
6524             RECT full_rect;
6525
6526             full_rect.left = 0;
6527             full_rect.top = 0;
6528             full_rect.right = dst_surface->resource.width;
6529             full_rect.bottom = dst_surface->resource.height;
6530             IntersectRect(&xdst, &full_rect, dst_rect);
6531         }
6532         else
6533         {
6534             BOOL clip_horiz, clip_vert;
6535
6536             xdst = *dst_rect;
6537             clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6538             clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6539
6540             if (clip_vert || clip_horiz)
6541             {
6542                 /* Now check if this is a special case or not... */
6543                 if ((flags & WINEDDBLT_DDFX)
6544                         || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6545                         || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6546                 {
6547                     WARN("Out of screen rectangle in special case. Not handled right now.\n");
6548                     return WINED3D_OK;
6549                 }
6550
6551                 if (clip_horiz)
6552                 {
6553                     if (xdst.left < 0)
6554                     {
6555                         xsrc.left -= xdst.left;
6556                         xdst.left = 0;
6557                     }
6558                     if (xdst.right > dst_surface->resource.width)
6559                     {
6560                         xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6561                         xdst.right = (int)dst_surface->resource.width;
6562                     }
6563                 }
6564
6565                 if (clip_vert)
6566                 {
6567                     if (xdst.top < 0)
6568                     {
6569                         xsrc.top -= xdst.top;
6570                         xdst.top = 0;
6571                     }
6572                     if (xdst.bottom > dst_surface->resource.height)
6573                     {
6574                         xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6575                         xdst.bottom = (int)dst_surface->resource.height;
6576                     }
6577                 }
6578
6579                 /* And check if after clipping something is still to be done... */
6580                 if ((xdst.right <= 0) || (xdst.bottom <= 0)
6581                         || (xdst.left >= (int)dst_surface->resource.width)
6582                         || (xdst.top >= (int)dst_surface->resource.height)
6583                         || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6584                         || (xsrc.left >= (int)src_surface->resource.width)
6585                         || (xsrc.top >= (int)src_surface->resource.height))
6586                 {
6587                     TRACE("Nothing to be done after clipping.\n");
6588                     return WINED3D_OK;
6589                 }
6590             }
6591         }
6592     }
6593     else
6594     {
6595         xdst.left = 0;
6596         xdst.top = 0;
6597         xdst.right = dst_surface->resource.width;
6598         xdst.bottom = dst_surface->resource.height;
6599     }
6600
6601     if (src_surface == dst_surface)
6602     {
6603         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6604         slock = dlock;
6605         src_format = dst_surface->resource.format;
6606         dst_format = src_format;
6607     }
6608     else
6609     {
6610         dst_format = dst_surface->resource.format;
6611         if (src_surface)
6612         {
6613             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6614             {
6615                 src_surface = surface_convert_format(src_surface, dst_format->id);
6616                 if (!src_surface)
6617                 {
6618                     /* The conv function writes a FIXME */
6619                     WARN("Cannot convert source surface format to dest format.\n");
6620                     goto release;
6621                 }
6622             }
6623             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6624             src_format = src_surface->resource.format;
6625         }
6626         else
6627         {
6628             src_format = dst_format;
6629         }
6630         if (dst_rect)
6631             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6632         else
6633             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6634     }
6635
6636     if (!fx || !(fx->dwDDFX)) flags &= ~WINEDDBLT_DDFX;
6637
6638     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_FOURCC)
6639     {
6640         if (!dst_rect || src_surface == dst_surface)
6641         {
6642             memcpy(dlock.pBits, slock.pBits, dst_surface->resource.size);
6643             goto release;
6644         }
6645     }
6646
6647     bpp = dst_surface->resource.format->byte_count;
6648     srcheight = xsrc.bottom - xsrc.top;
6649     srcwidth = xsrc.right - xsrc.left;
6650     dstheight = xdst.bottom - xdst.top;
6651     dstwidth = xdst.right - xdst.left;
6652     width = (xdst.right - xdst.left) * bpp;
6653
6654     if (dst_rect && src_surface != dst_surface)
6655         dbuf = dlock.pBits;
6656     else
6657         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6658
6659     if (flags & WINEDDBLT_WAIT)
6660     {
6661         flags &= ~WINEDDBLT_WAIT;
6662     }
6663     if (flags & WINEDDBLT_ASYNC)
6664     {
6665         static BOOL displayed = FALSE;
6666         if (!displayed)
6667             FIXME("Can't handle WINEDDBLT_ASYNC flag right now.\n");
6668         displayed = TRUE;
6669         flags &= ~WINEDDBLT_ASYNC;
6670     }
6671     if (flags & WINEDDBLT_DONOTWAIT)
6672     {
6673         /* WINEDDBLT_DONOTWAIT appeared in DX7 */
6674         static BOOL displayed = FALSE;
6675         if (!displayed)
6676             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag right now.\n");
6677         displayed = TRUE;
6678         flags &= ~WINEDDBLT_DONOTWAIT;
6679     }
6680
6681     /* First, all the 'source-less' blits */
6682     if (flags & WINEDDBLT_COLORFILL)
6683     {
6684         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6685         flags &= ~WINEDDBLT_COLORFILL;
6686     }
6687
6688     if (flags & WINEDDBLT_DEPTHFILL)
6689     {
6690         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6691     }
6692     if (flags & WINEDDBLT_ROP)
6693     {
6694         /* Catch some degenerate cases here. */
6695         switch (fx->dwROP)
6696         {
6697             case BLACKNESS:
6698                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6699                 break;
6700             case 0xAA0029: /* No-op */
6701                 break;
6702             case WHITENESS:
6703                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6704                 break;
6705             case SRCCOPY: /* Well, we do that below? */
6706                 break;
6707             default:
6708                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6709                 goto error;
6710         }
6711         flags &= ~WINEDDBLT_ROP;
6712     }
6713     if (flags & WINEDDBLT_DDROPS)
6714     {
6715         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6716     }
6717     /* Now the 'with source' blits. */
6718     if (src_surface)
6719     {
6720         const BYTE *sbase;
6721         int sx, xinc, sy, yinc;
6722
6723         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6724             goto release;
6725
6726         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6727                 && (srcwidth != dstwidth || srcheight != dstheight))
6728         {
6729             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6730             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6731         }
6732
6733         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6734         xinc = (srcwidth << 16) / dstwidth;
6735         yinc = (srcheight << 16) / dstheight;
6736
6737         if (!flags)
6738         {
6739             /* No effects, we can cheat here. */
6740             if (dstwidth == srcwidth)
6741             {
6742                 if (dstheight == srcheight)
6743                 {
6744                     /* No stretching in either direction. This needs to be as
6745                      * fast as possible. */
6746                     sbuf = sbase;
6747
6748                     /* Check for overlapping surfaces. */
6749                     if (src_surface != dst_surface || xdst.top < xsrc.top
6750                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6751                     {
6752                         /* No overlap, or dst above src, so copy from top downwards. */
6753                         for (y = 0; y < dstheight; ++y)
6754                         {
6755                             memcpy(dbuf, sbuf, width);
6756                             sbuf += slock.Pitch;
6757                             dbuf += dlock.Pitch;
6758                         }
6759                     }
6760                     else if (xdst.top > xsrc.top)
6761                     {
6762                         /* Copy from bottom upwards. */
6763                         sbuf += (slock.Pitch*dstheight);
6764                         dbuf += (dlock.Pitch*dstheight);
6765                         for (y = 0; y < dstheight; ++y)
6766                         {
6767                             sbuf -= slock.Pitch;
6768                             dbuf -= dlock.Pitch;
6769                             memcpy(dbuf, sbuf, width);
6770                         }
6771                     }
6772                     else
6773                     {
6774                         /* Src and dst overlapping on the same line, use memmove. */
6775                         for (y = 0; y < dstheight; ++y)
6776                         {
6777                             memmove(dbuf, sbuf, width);
6778                             sbuf += slock.Pitch;
6779                             dbuf += dlock.Pitch;
6780                         }
6781                     }
6782                 }
6783                 else
6784                 {
6785                     /* Stretching in y direction only. */
6786                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6787                     {
6788                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6789                         memcpy(dbuf, sbuf, width);
6790                         dbuf += dlock.Pitch;
6791                     }
6792                 }
6793             }
6794             else
6795             {
6796                 /* Stretching in X direction. */
6797                 int last_sy = -1;
6798                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6799                 {
6800                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6801
6802                     if ((sy >> 16) == (last_sy >> 16))
6803                     {
6804                         /* This source row is the same as last source row -
6805                          * Copy the already stretched row. */
6806                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6807                     }
6808                     else
6809                     {
6810 #define STRETCH_ROW(type) \
6811 do { \
6812     const type *s = (const type *)sbuf; \
6813     type *d = (type *)dbuf; \
6814     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6815         d[x] = s[sx >> 16]; \
6816 } while(0)
6817
6818                         switch(bpp)
6819                         {
6820                             case 1:
6821                                 STRETCH_ROW(BYTE);
6822                                 break;
6823                             case 2:
6824                                 STRETCH_ROW(WORD);
6825                                 break;
6826                             case 4:
6827                                 STRETCH_ROW(DWORD);
6828                                 break;
6829                             case 3:
6830                             {
6831                                 const BYTE *s;
6832                                 BYTE *d = dbuf;
6833                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6834                                 {
6835                                     DWORD pixel;
6836
6837                                     s = sbuf + 3 * (sx >> 16);
6838                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6839                                     d[0] = (pixel      ) & 0xff;
6840                                     d[1] = (pixel >>  8) & 0xff;
6841                                     d[2] = (pixel >> 16) & 0xff;
6842                                     d += 3;
6843                                 }
6844                                 break;
6845                             }
6846                             default:
6847                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6848                                 hr = WINED3DERR_NOTAVAILABLE;
6849                                 goto error;
6850                         }
6851 #undef STRETCH_ROW
6852                     }
6853                     dbuf += dlock.Pitch;
6854                     last_sy = sy;
6855                 }
6856             }
6857         }
6858         else
6859         {
6860             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6861             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6862             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6863             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6864             {
6865                 /* The color keying flags are checked for correctness in ddraw */
6866                 if (flags & WINEDDBLT_KEYSRC)
6867                 {
6868                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6869                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6870                 }
6871                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6872                 {
6873                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6874                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6875                 }
6876
6877                 if (flags & WINEDDBLT_KEYDEST)
6878                 {
6879                     /* Destination color keys are taken from the source surface! */
6880                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6881                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6882                 }
6883                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6884                 {
6885                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6886                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6887                 }
6888
6889                 if (bpp == 1)
6890                 {
6891                     keymask = 0xff;
6892                 }
6893                 else
6894                 {
6895                     keymask = src_format->red_mask
6896                             | src_format->green_mask
6897                             | src_format->blue_mask;
6898                 }
6899                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6900             }
6901
6902             if (flags & WINEDDBLT_DDFX)
6903             {
6904                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6905                 LONG tmpxy;
6906                 dTopLeft     = dbuf;
6907                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6908                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6909                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6910
6911                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6912                 {
6913                     /* I don't think we need to do anything about this flag */
6914                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6915                 }
6916                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6917                 {
6918                     tmp          = dTopRight;
6919                     dTopRight    = dTopLeft;
6920                     dTopLeft     = tmp;
6921                     tmp          = dBottomRight;
6922                     dBottomRight = dBottomLeft;
6923                     dBottomLeft  = tmp;
6924                     dstxinc = dstxinc * -1;
6925                 }
6926                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6927                 {
6928                     tmp          = dTopLeft;
6929                     dTopLeft     = dBottomLeft;
6930                     dBottomLeft  = tmp;
6931                     tmp          = dTopRight;
6932                     dTopRight    = dBottomRight;
6933                     dBottomRight = tmp;
6934                     dstyinc = dstyinc * -1;
6935                 }
6936                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6937                 {
6938                     /* I don't think we need to do anything about this flag */
6939                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6940                 }
6941                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6942                 {
6943                     tmp          = dBottomRight;
6944                     dBottomRight = dTopLeft;
6945                     dTopLeft     = tmp;
6946                     tmp          = dBottomLeft;
6947                     dBottomLeft  = dTopRight;
6948                     dTopRight    = tmp;
6949                     dstxinc = dstxinc * -1;
6950                     dstyinc = dstyinc * -1;
6951                 }
6952                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6953                 {
6954                     tmp          = dTopLeft;
6955                     dTopLeft     = dBottomLeft;
6956                     dBottomLeft  = dBottomRight;
6957                     dBottomRight = dTopRight;
6958                     dTopRight    = tmp;
6959                     tmpxy   = dstxinc;
6960                     dstxinc = dstyinc;
6961                     dstyinc = tmpxy;
6962                     dstxinc = dstxinc * -1;
6963                 }
6964                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6965                 {
6966                     tmp          = dTopLeft;
6967                     dTopLeft     = dTopRight;
6968                     dTopRight    = dBottomRight;
6969                     dBottomRight = dBottomLeft;
6970                     dBottomLeft  = tmp;
6971                     tmpxy   = dstxinc;
6972                     dstxinc = dstyinc;
6973                     dstyinc = tmpxy;
6974                     dstyinc = dstyinc * -1;
6975                 }
6976                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6977                 {
6978                     /* I don't think we need to do anything about this flag */
6979                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6980                 }
6981                 dbuf = dTopLeft;
6982                 flags &= ~(WINEDDBLT_DDFX);
6983             }
6984
6985 #define COPY_COLORKEY_FX(type) \
6986 do { \
6987     const type *s; \
6988     type *d = (type *)dbuf, *dx, tmp; \
6989     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6990     { \
6991         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6992         dx = d; \
6993         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6994         { \
6995             tmp = s[sx >> 16]; \
6996             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6997                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6998             { \
6999                 dx[0] = tmp; \
7000             } \
7001             dx = (type *)(((BYTE *)dx) + dstxinc); \
7002         } \
7003         d = (type *)(((BYTE *)d) + dstyinc); \
7004     } \
7005 } while(0)
7006
7007             switch (bpp)
7008             {
7009                 case 1:
7010                     COPY_COLORKEY_FX(BYTE);
7011                     break;
7012                 case 2:
7013                     COPY_COLORKEY_FX(WORD);
7014                     break;
7015                 case 4:
7016                     COPY_COLORKEY_FX(DWORD);
7017                     break;
7018                 case 3:
7019                 {
7020                     const BYTE *s;
7021                     BYTE *d = dbuf, *dx;
7022                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7023                     {
7024                         sbuf = sbase + (sy >> 16) * slock.Pitch;
7025                         dx = d;
7026                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7027                         {
7028                             DWORD pixel, dpixel = 0;
7029                             s = sbuf + 3 * (sx>>16);
7030                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7031                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7032                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7033                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7034                             {
7035                                 dx[0] = (pixel      ) & 0xff;
7036                                 dx[1] = (pixel >>  8) & 0xff;
7037                                 dx[2] = (pixel >> 16) & 0xff;
7038                             }
7039                             dx += dstxinc;
7040                         }
7041                         d += dstyinc;
7042                     }
7043                     break;
7044                 }
7045                 default:
7046                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7047                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7048                     hr = WINED3DERR_NOTAVAILABLE;
7049                     goto error;
7050 #undef COPY_COLORKEY_FX
7051             }
7052         }
7053     }
7054
7055 error:
7056     if (flags && FIXME_ON(d3d_surface))
7057     {
7058         FIXME("\tUnsupported flags: %#x.\n", flags);
7059     }
7060
7061 release:
7062     wined3d_surface_unmap(dst_surface);
7063     if (src_surface && src_surface != dst_surface)
7064         wined3d_surface_unmap(src_surface);
7065     /* Release the converted surface, if any. */
7066     if (src_surface && src_surface != orig_src)
7067         wined3d_surface_decref(src_surface);
7068
7069     return hr;
7070 }
7071
7072 static HRESULT surface_cpu_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
7073         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
7074 {
7075     const struct wined3d_format *src_format, *dst_format;
7076     RECT lock_src, lock_dst, lock_union;
7077     WINED3DLOCKED_RECT dlock, slock;
7078     HRESULT hr = WINED3D_OK;
7079     int bpp, w, h, x, y;
7080     const BYTE *sbuf;
7081     BYTE *dbuf;
7082     RECT rsrc2;
7083
7084     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
7085             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
7086
7087     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface->flags & SFLAG_LOCKED))
7088     {
7089         WARN(" Surface is busy, returning DDERR_SURFACEBUSY\n");
7090         return WINEDDERR_SURFACEBUSY;
7091     }
7092
7093     if (!src_rect)
7094     {
7095         WARN("src_rect is NULL!\n");
7096         rsrc2.left = 0;
7097         rsrc2.top = 0;
7098         rsrc2.right = src_surface->resource.width;
7099         rsrc2.bottom = src_surface->resource.height;
7100         src_rect = &rsrc2;
7101     }
7102
7103     /* Check source rect for validity. Copied from normal Blt. Fixes Baldur's Gate. */
7104     if ((src_rect->bottom > src_surface->resource.height) || (src_rect->bottom < 0)
7105             || (src_rect->top > src_surface->resource.height) || (src_rect->top < 0)
7106             || (src_rect->left > src_surface->resource.width) || (src_rect->left < 0)
7107             || (src_rect->right > src_surface->resource.width) || (src_rect->right < 0)
7108             || (src_rect->right < src_rect->left) || (src_rect->bottom < src_rect->top))
7109     {
7110         WARN("Application gave us bad source rectangle for BltFast.\n");
7111         return WINEDDERR_INVALIDRECT;
7112     }
7113
7114     h = src_rect->bottom - src_rect->top;
7115     if (h > dst_surface->resource.height - dst_y)
7116         h = dst_surface->resource.height - dst_y;
7117     if (h > src_surface->resource.height - src_rect->top)
7118         h = src_surface->resource.height - src_rect->top;
7119     if (h <= 0)
7120         return WINEDDERR_INVALIDRECT;
7121
7122     w = src_rect->right - src_rect->left;
7123     if (w > dst_surface->resource.width - dst_x)
7124         w = dst_surface->resource.width - dst_x;
7125     if (w > src_surface->resource.width - src_rect->left)
7126         w = src_surface->resource.width - src_rect->left;
7127     if (w <= 0)
7128         return WINEDDERR_INVALIDRECT;
7129
7130     /* Now compute the locking rectangle... */
7131     lock_src.left = src_rect->left;
7132     lock_src.top = src_rect->top;
7133     lock_src.right = lock_src.left + w;
7134     lock_src.bottom = lock_src.top + h;
7135
7136     lock_dst.left = dst_x;
7137     lock_dst.top = dst_y;
7138     lock_dst.right = dst_x + w;
7139     lock_dst.bottom = dst_y + h;
7140
7141     bpp = dst_surface->resource.format->byte_count;
7142
7143     /* We need to lock the surfaces, or we won't get refreshes when done. */
7144     if (src_surface == dst_surface)
7145     {
7146         int pitch;
7147
7148         UnionRect(&lock_union, &lock_src, &lock_dst);
7149
7150         /* Lock the union of the two rectangles. */
7151         hr = wined3d_surface_map(dst_surface, &dlock, &lock_union, 0);
7152         if (FAILED(hr))
7153             goto error;
7154
7155         pitch = dlock.Pitch;
7156         slock.Pitch = dlock.Pitch;
7157
7158         /* Since slock was originally copied from this surface's description, we can just reuse it. */
7159         sbuf = dst_surface->resource.allocatedMemory + lock_src.top * pitch + lock_src.left * bpp;
7160         dbuf = dst_surface->resource.allocatedMemory + lock_dst.top * pitch + lock_dst.left * bpp;
7161         src_format = src_surface->resource.format;
7162         dst_format = src_format;
7163     }
7164     else
7165     {
7166         hr = wined3d_surface_map(src_surface, &slock, &lock_src, WINED3DLOCK_READONLY);
7167         if (FAILED(hr))
7168             goto error;
7169         hr = wined3d_surface_map(dst_surface, &dlock, &lock_dst, 0);
7170         if (FAILED(hr))
7171             goto error;
7172
7173         sbuf = slock.pBits;
7174         dbuf = dlock.pBits;
7175         TRACE("Dst is at %p, Src is at %p.\n", dbuf, sbuf);
7176
7177         src_format = src_surface->resource.format;
7178         dst_format = dst_surface->resource.format;
7179     }
7180
7181     /* Handle compressed surfaces first... */
7182     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
7183     {
7184         UINT row_block_count;
7185
7186         TRACE("compressed -> compressed copy\n");
7187         if (trans)
7188             FIXME("trans arg not supported when a compressed surface is involved\n");
7189         if (dst_x || dst_y)
7190             FIXME("offset for destination surface is not supported\n");
7191         if (src_surface->resource.format->id != dst_surface->resource.format->id)
7192         {
7193             FIXME("compressed -> compressed copy only supported for the same type of surface\n");
7194             hr = WINED3DERR_WRONGTEXTUREFORMAT;
7195             goto error;
7196         }
7197
7198         row_block_count = (w + dst_format->block_width - 1) / dst_format->block_width;
7199         for (y = 0; y < h; y += dst_format->block_height)
7200         {
7201             memcpy(dbuf, sbuf, row_block_count * dst_format->block_byte_count);
7202             dbuf += dlock.Pitch;
7203             sbuf += slock.Pitch;
7204         }
7205
7206         goto error;
7207     }
7208     if ((src_format->flags & WINED3DFMT_FLAG_COMPRESSED) && !(dst_format->flags & WINED3DFMT_FLAG_COMPRESSED))
7209     {
7210         /* TODO: Use the libtxc_dxtn.so shared library to do software
7211          * decompression. */
7212         ERR("Software decompression not supported.\n");
7213         goto error;
7214     }
7215
7216     if (trans & (WINEDDBLTFAST_SRCCOLORKEY | WINEDDBLTFAST_DESTCOLORKEY))
7217     {
7218         DWORD keylow, keyhigh;
7219         DWORD mask = src_surface->resource.format->red_mask
7220                 | src_surface->resource.format->green_mask
7221                 | src_surface->resource.format->blue_mask;
7222
7223         /* For some 8-bit formats like L8 and P8 color masks don't make sense */
7224         if (!mask && bpp == 1)
7225             mask = 0xff;
7226
7227         TRACE("Color keyed copy.\n");
7228         if (trans & WINEDDBLTFAST_SRCCOLORKEY)
7229         {
7230             keylow = src_surface->SrcBltCKey.dwColorSpaceLowValue;
7231             keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
7232         }
7233         else
7234         {
7235             /* I'm not sure if this is correct. */
7236             FIXME("WINEDDBLTFAST_DESTCOLORKEY not fully supported yet.\n");
7237             keylow = dst_surface->DestBltCKey.dwColorSpaceLowValue;
7238             keyhigh = dst_surface->DestBltCKey.dwColorSpaceHighValue;
7239         }
7240
7241 #define COPYBOX_COLORKEY(type) \
7242 do { \
7243     const type *s = (const type *)sbuf; \
7244     type *d = (type *)dbuf; \
7245     type tmp; \
7246     for (y = 0; y < h; y++) \
7247     { \
7248         for (x = 0; x < w; x++) \
7249         { \
7250             tmp = s[x]; \
7251             if ((tmp & mask) < keylow || (tmp & mask) > keyhigh) d[x] = tmp; \
7252         } \
7253         s = (const type *)((const BYTE *)s + slock.Pitch); \
7254         d = (type *)((BYTE *)d + dlock.Pitch); \
7255     } \
7256 } while(0)
7257
7258         switch (bpp)
7259         {
7260             case 1:
7261                 COPYBOX_COLORKEY(BYTE);
7262                 break;
7263             case 2:
7264                 COPYBOX_COLORKEY(WORD);
7265                 break;
7266             case 4:
7267                 COPYBOX_COLORKEY(DWORD);
7268                 break;
7269             case 3:
7270             {
7271                 const BYTE *s;
7272                 DWORD tmp;
7273                 BYTE *d;
7274                 s = sbuf;
7275                 d = dbuf;
7276                 for (y = 0; y < h; ++y)
7277                 {
7278                     for (x = 0; x < w * 3; x += 3)
7279                     {
7280                         tmp = (DWORD)s[x] + ((DWORD)s[x + 1] << 8) + ((DWORD)s[x + 2] << 16);
7281                         if (tmp < keylow || tmp > keyhigh)
7282                         {
7283                             d[x + 0] = s[x + 0];
7284                             d[x + 1] = s[x + 1];
7285                             d[x + 2] = s[x + 2];
7286                         }
7287                     }
7288                     s += slock.Pitch;
7289                     d += dlock.Pitch;
7290                 }
7291                 break;
7292             }
7293             default:
7294                 FIXME("Source color key blitting not supported for bpp %u.\n", bpp * 8);
7295                 hr = WINED3DERR_NOTAVAILABLE;
7296                 goto error;
7297         }
7298 #undef COPYBOX_COLORKEY
7299         TRACE("Copy done.\n");
7300     }
7301     else
7302     {
7303         int width = w * bpp;
7304         INT sbufpitch, dbufpitch;
7305
7306         TRACE("No color key copy.\n");
7307         /* Handle overlapping surfaces. */
7308         if (sbuf < dbuf)
7309         {
7310             sbuf += (h - 1) * slock.Pitch;
7311             dbuf += (h - 1) * dlock.Pitch;
7312             sbufpitch = -slock.Pitch;
7313             dbufpitch = -dlock.Pitch;
7314         }
7315         else
7316         {
7317             sbufpitch = slock.Pitch;
7318             dbufpitch = dlock.Pitch;
7319         }
7320         for (y = 0; y < h; ++y)
7321         {
7322             /* This is pretty easy, a line for line memcpy. */
7323             memmove(dbuf, sbuf, width);
7324             sbuf += sbufpitch;
7325             dbuf += dbufpitch;
7326         }
7327         TRACE("Copy done.\n");
7328     }
7329
7330 error:
7331     if (src_surface == dst_surface)
7332     {
7333         wined3d_surface_unmap(dst_surface);
7334     }
7335     else
7336     {
7337         wined3d_surface_unmap(dst_surface);
7338         wined3d_surface_unmap(src_surface);
7339     }
7340
7341     return hr;
7342 }
7343
7344 /* Do not call while under the GL lock. */
7345 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7346         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7347 {
7348     WINEDDBLTFX BltFx;
7349
7350     memset(&BltFx, 0, sizeof(BltFx));
7351     BltFx.dwSize = sizeof(BltFx);
7352     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface->resource.format, color);
7353     return wined3d_surface_blt(dst_surface, dst_rect, NULL, NULL,
7354             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7355 }
7356
7357 /* Do not call while under the GL lock. */
7358 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7359         struct wined3d_surface *surface, const RECT *rect, float depth)
7360 {
7361     FIXME("Depth filling not implemented by cpu_blit.\n");
7362     return WINED3DERR_INVALIDCALL;
7363 }
7364
7365 const struct blit_shader cpu_blit =  {
7366     cpu_blit_alloc,
7367     cpu_blit_free,
7368     cpu_blit_set,
7369     cpu_blit_unset,
7370     cpu_blit_supported,
7371     cpu_blit_color_fill,
7372     cpu_blit_depth_fill,
7373 };
7374
7375 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7376         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7377         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7378         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7379 {
7380     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7381     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7382     unsigned int resource_size;
7383     HRESULT hr;
7384
7385     if (multisample_quality > 0)
7386     {
7387         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7388         multisample_quality = 0;
7389     }
7390
7391     /* Quick lockable sanity check.
7392      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7393      * this function is too deep to need to care about things like this.
7394      * Levels need to be checked too, since they all affect what can be done. */
7395     switch (pool)
7396     {
7397         case WINED3DPOOL_SCRATCH:
7398             if (!lockable)
7399             {
7400                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7401                         "which are mutually exclusive, setting lockable to TRUE.\n");
7402                 lockable = TRUE;
7403             }
7404             break;
7405
7406         case WINED3DPOOL_SYSTEMMEM:
7407             if (!lockable)
7408                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7409             break;
7410
7411         case WINED3DPOOL_MANAGED:
7412             if (usage & WINED3DUSAGE_DYNAMIC)
7413                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7414             break;
7415
7416         case WINED3DPOOL_DEFAULT:
7417             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7418                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7419             break;
7420
7421         default:
7422             FIXME("Unknown pool %#x.\n", pool);
7423             break;
7424     };
7425
7426     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7427         FIXME("Trying to create a render target that isn't in the default pool.\n");
7428
7429     /* FIXME: Check that the format is supported by the device. */
7430
7431     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7432     if (!resource_size)
7433         return WINED3DERR_INVALIDCALL;
7434
7435     surface->surface_type = surface_type;
7436
7437     switch (surface_type)
7438     {
7439         case SURFACE_OPENGL:
7440             surface->surface_ops = &surface_ops;
7441             break;
7442
7443         case SURFACE_GDI:
7444             surface->surface_ops = &gdi_surface_ops;
7445             break;
7446
7447         default:
7448             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7449             return WINED3DERR_INVALIDCALL;
7450     }
7451
7452     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7453             multisample_type, multisample_quality, usage, pool, width, height, 1,
7454             resource_size, parent, parent_ops, &surface_resource_ops);
7455     if (FAILED(hr))
7456     {
7457         WARN("Failed to initialize resource, returning %#x.\n", hr);
7458         return hr;
7459     }
7460
7461     /* "Standalone" surface. */
7462     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7463
7464     surface->texture_level = level;
7465     list_init(&surface->overlays);
7466
7467     /* Flags */
7468     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7469     if (discard)
7470         surface->flags |= SFLAG_DISCARD;
7471     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7472         surface->flags |= SFLAG_LOCKABLE;
7473     /* I'm not sure if this qualifies as a hack or as an optimization. It
7474      * seems reasonable to assume that lockable render targets will get
7475      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7476      * creation. However, the other reason we want to do this is that several
7477      * ddraw applications access surface memory while the surface isn't
7478      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7479      * future locks prevents these from crashing. */
7480     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7481         surface->flags |= SFLAG_DYNLOCK;
7482
7483     /* Mark the texture as dirty so that it gets loaded first time around. */
7484     surface_add_dirty_rect(surface, NULL);
7485     list_init(&surface->renderbuffers);
7486
7487     TRACE("surface %p, memory %p, size %u\n",
7488             surface, surface->resource.allocatedMemory, surface->resource.size);
7489
7490     /* Call the private setup routine */
7491     hr = surface->surface_ops->surface_private_setup(surface);
7492     if (FAILED(hr))
7493     {
7494         ERR("Private setup failed, returning %#x\n", hr);
7495         surface->surface_ops->surface_cleanup(surface);
7496         return hr;
7497     }
7498
7499     return hr;
7500 }
7501
7502 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7503         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7504         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7505         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7506 {
7507     struct wined3d_surface *object;
7508     HRESULT hr;
7509
7510     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7511             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7512     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7513             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7514     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7515
7516     if (surface_type == SURFACE_OPENGL && !device->adapter)
7517     {
7518         ERR("OpenGL surfaces are not available without OpenGL.\n");
7519         return WINED3DERR_NOTAVAILABLE;
7520     }
7521
7522     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7523     if (!object)
7524     {
7525         ERR("Failed to allocate surface memory.\n");
7526         return WINED3DERR_OUTOFVIDEOMEMORY;
7527     }
7528
7529     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7530             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7531     if (FAILED(hr))
7532     {
7533         WARN("Failed to initialize surface, returning %#x.\n", hr);
7534         HeapFree(GetProcessHeap(), 0, object);
7535         return hr;
7536     }
7537
7538     TRACE("Created surface %p.\n", object);
7539     *surface = object;
7540
7541     return hr;
7542 }