wined3d: Don't set GL_MAP_UNSYNCHRONIZED_BIT for WINED3D_BUFFER_DISCARD maps.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT surface_cpu_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans);
41 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
42         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
43         WINED3DTEXTUREFILTERTYPE filter);
44
45 static void surface_cleanup(struct wined3d_surface *surface)
46 {
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO) || !list_empty(&surface->renderbuffers))
50     {
51         struct wined3d_renderbuffer_entry *entry, *entry2;
52         const struct wined3d_gl_info *gl_info;
53         struct wined3d_context *context;
54
55         context = context_acquire(surface->resource.device, NULL);
56         gl_info = context->gl_info;
57
58         ENTER_GL();
59
60         if (surface->texture_name)
61         {
62             TRACE("Deleting texture %u.\n", surface->texture_name);
63             glDeleteTextures(1, &surface->texture_name);
64         }
65
66         if (surface->flags & SFLAG_PBO)
67         {
68             TRACE("Deleting PBO %u.\n", surface->pbo);
69             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
70         }
71
72         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
73         {
74             TRACE("Deleting renderbuffer %u.\n", entry->id);
75             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
76             HeapFree(GetProcessHeap(), 0, entry);
77         }
78
79         LEAVE_GL();
80
81         context_release(context);
82     }
83
84     if (surface->flags & SFLAG_DIBSECTION)
85     {
86         /* Release the DC. */
87         SelectObject(surface->hDC, surface->dib.holdbitmap);
88         DeleteDC(surface->hDC);
89         /* Release the DIB section. */
90         DeleteObject(surface->dib.DIBsection);
91         surface->dib.bitmap_data = NULL;
92         surface->resource.allocatedMemory = NULL;
93     }
94
95     if (surface->flags & SFLAG_USERPTR)
96         wined3d_surface_set_mem(surface, NULL);
97     if (surface->overlay_dest)
98         list_remove(&surface->overlay_entry);
99
100     HeapFree(GetProcessHeap(), 0, surface->palette9);
101
102     resource_cleanup(&surface->resource);
103 }
104
105 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
106 {
107     TRACE("surface %p, container %p.\n", surface, container);
108
109     if (!container && type != WINED3D_CONTAINER_NONE)
110         ERR("Setting NULL container of type %#x.\n", type);
111
112     if (type == WINED3D_CONTAINER_SWAPCHAIN)
113     {
114         surface->get_drawable_size = get_drawable_size_swapchain;
115     }
116     else
117     {
118         switch (wined3d_settings.offscreen_rendering_mode)
119         {
120             case ORM_FBO:
121                 surface->get_drawable_size = get_drawable_size_fbo;
122                 break;
123
124             case ORM_BACKBUFFER:
125                 surface->get_drawable_size = get_drawable_size_backbuffer;
126                 break;
127
128             default:
129                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
130                 return;
131         }
132     }
133
134     surface->container.type = type;
135     surface->container.u.base = container;
136 }
137
138 struct blt_info
139 {
140     GLenum binding;
141     GLenum bind_target;
142     enum tex_types tex_type;
143     GLfloat coords[4][3];
144 };
145
146 struct float_rect
147 {
148     float l;
149     float t;
150     float r;
151     float b;
152 };
153
154 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
155 {
156     f->l = ((r->left * 2.0f) / w) - 1.0f;
157     f->t = ((r->top * 2.0f) / h) - 1.0f;
158     f->r = ((r->right * 2.0f) / w) - 1.0f;
159     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
160 }
161
162 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
163 {
164     GLfloat (*coords)[3] = info->coords;
165     struct float_rect f;
166
167     switch (target)
168     {
169         default:
170             FIXME("Unsupported texture target %#x\n", target);
171             /* Fall back to GL_TEXTURE_2D */
172         case GL_TEXTURE_2D:
173             info->binding = GL_TEXTURE_BINDING_2D;
174             info->bind_target = GL_TEXTURE_2D;
175             info->tex_type = tex_2d;
176             coords[0][0] = (float)rect->left / w;
177             coords[0][1] = (float)rect->top / h;
178             coords[0][2] = 0.0f;
179
180             coords[1][0] = (float)rect->right / w;
181             coords[1][1] = (float)rect->top / h;
182             coords[1][2] = 0.0f;
183
184             coords[2][0] = (float)rect->left / w;
185             coords[2][1] = (float)rect->bottom / h;
186             coords[2][2] = 0.0f;
187
188             coords[3][0] = (float)rect->right / w;
189             coords[3][1] = (float)rect->bottom / h;
190             coords[3][2] = 0.0f;
191             break;
192
193         case GL_TEXTURE_RECTANGLE_ARB:
194             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
195             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
196             info->tex_type = tex_rect;
197             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
198             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
199             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
200             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
201             break;
202
203         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
204             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
205             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
206             info->tex_type = tex_cube;
207             cube_coords_float(rect, w, h, &f);
208
209             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
210             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
211             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
212             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
213             break;
214
215         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
216             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
217             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
218             info->tex_type = tex_cube;
219             cube_coords_float(rect, w, h, &f);
220
221             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
222             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
223             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
224             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
225             break;
226
227         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
228             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
229             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
230             info->tex_type = tex_cube;
231             cube_coords_float(rect, w, h, &f);
232
233             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
234             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
235             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
236             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
237             break;
238
239         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
240             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
241             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
242             info->tex_type = tex_cube;
243             cube_coords_float(rect, w, h, &f);
244
245             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
246             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
247             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
248             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
249             break;
250
251         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
252             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
253             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
254             info->tex_type = tex_cube;
255             cube_coords_float(rect, w, h, &f);
256
257             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
258             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
259             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
260             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
261             break;
262
263         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
264             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
265             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
266             info->tex_type = tex_cube;
267             cube_coords_float(rect, w, h, &f);
268
269             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
270             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
271             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
272             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
273             break;
274     }
275 }
276
277 static inline void surface_get_rect(struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
278 {
279     if (rect_in)
280         *rect_out = *rect_in;
281     else
282     {
283         rect_out->left = 0;
284         rect_out->top = 0;
285         rect_out->right = surface->resource.width;
286         rect_out->bottom = surface->resource.height;
287     }
288 }
289
290 /* GL locking and context activation is done by the caller */
291 void draw_textured_quad(const struct wined3d_surface *src_surface, const RECT *src_rect,
292         const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
293 {
294     struct blt_info info;
295
296     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
297
298     glEnable(info.bind_target);
299     checkGLcall("glEnable(bind_target)");
300
301     /* Bind the texture */
302     glBindTexture(info.bind_target, src_surface->texture_name);
303     checkGLcall("glBindTexture");
304
305     /* Filtering for StretchRect */
306     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
307             wined3d_gl_mag_filter(magLookup, Filter));
308     checkGLcall("glTexParameteri");
309     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
310             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
311     checkGLcall("glTexParameteri");
312     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
313     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
314     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
315     checkGLcall("glTexEnvi");
316
317     /* Draw a quad */
318     glBegin(GL_TRIANGLE_STRIP);
319     glTexCoord3fv(info.coords[0]);
320     glVertex2i(dst_rect->left, dst_rect->top);
321
322     glTexCoord3fv(info.coords[1]);
323     glVertex2i(dst_rect->right, dst_rect->top);
324
325     glTexCoord3fv(info.coords[2]);
326     glVertex2i(dst_rect->left, dst_rect->bottom);
327
328     glTexCoord3fv(info.coords[3]);
329     glVertex2i(dst_rect->right, dst_rect->bottom);
330     glEnd();
331
332     /* Unbind the texture */
333     glBindTexture(info.bind_target, 0);
334     checkGLcall("glBindTexture(info->bind_target, 0)");
335
336     /* We changed the filtering settings on the texture. Inform the
337      * container about this to get the filters reset properly next draw. */
338     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
339     {
340         struct wined3d_texture *texture = src_surface->container.u.texture;
341         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
342         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
343         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
344     }
345 }
346
347 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
348 {
349     const struct wined3d_format *format = surface->resource.format;
350     SYSTEM_INFO sysInfo;
351     BITMAPINFO *b_info;
352     int extraline = 0;
353     DWORD *masks;
354     UINT usage;
355     HDC dc;
356
357     TRACE("surface %p.\n", surface);
358
359     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
360     {
361         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
362         return WINED3DERR_INVALIDCALL;
363     }
364
365     switch (format->byte_count)
366     {
367         case 2:
368         case 4:
369             /* Allocate extra space to store the RGB bit masks. */
370             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
371             break;
372
373         case 3:
374             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
375             break;
376
377         default:
378             /* Allocate extra space for a palette. */
379             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
380                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
381             break;
382     }
383
384     if (!b_info)
385         return E_OUTOFMEMORY;
386
387     /* Some applications access the surface in via DWORDs, and do not take
388      * the necessary care at the end of the surface. So we need at least
389      * 4 extra bytes at the end of the surface. Check against the page size,
390      * if the last page used for the surface has at least 4 spare bytes we're
391      * safe, otherwise add an extra line to the DIB section. */
392     GetSystemInfo(&sysInfo);
393     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
394     {
395         extraline = 1;
396         TRACE("Adding an extra line to the DIB section.\n");
397     }
398
399     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
400     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
401     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
402     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
403     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
404             * wined3d_surface_get_pitch(surface);
405     b_info->bmiHeader.biPlanes = 1;
406     b_info->bmiHeader.biBitCount = format->byte_count * 8;
407
408     b_info->bmiHeader.biXPelsPerMeter = 0;
409     b_info->bmiHeader.biYPelsPerMeter = 0;
410     b_info->bmiHeader.biClrUsed = 0;
411     b_info->bmiHeader.biClrImportant = 0;
412
413     /* Get the bit masks */
414     masks = (DWORD *)b_info->bmiColors;
415     switch (surface->resource.format->id)
416     {
417         case WINED3DFMT_B8G8R8_UNORM:
418             usage = DIB_RGB_COLORS;
419             b_info->bmiHeader.biCompression = BI_RGB;
420             break;
421
422         case WINED3DFMT_B5G5R5X1_UNORM:
423         case WINED3DFMT_B5G5R5A1_UNORM:
424         case WINED3DFMT_B4G4R4A4_UNORM:
425         case WINED3DFMT_B4G4R4X4_UNORM:
426         case WINED3DFMT_B2G3R3_UNORM:
427         case WINED3DFMT_B2G3R3A8_UNORM:
428         case WINED3DFMT_R10G10B10A2_UNORM:
429         case WINED3DFMT_R8G8B8A8_UNORM:
430         case WINED3DFMT_R8G8B8X8_UNORM:
431         case WINED3DFMT_B10G10R10A2_UNORM:
432         case WINED3DFMT_B5G6R5_UNORM:
433         case WINED3DFMT_R16G16B16A16_UNORM:
434             usage = 0;
435             b_info->bmiHeader.biCompression = BI_BITFIELDS;
436             masks[0] = format->red_mask;
437             masks[1] = format->green_mask;
438             masks[2] = format->blue_mask;
439             break;
440
441         default:
442             /* Don't know palette */
443             b_info->bmiHeader.biCompression = BI_RGB;
444             usage = 0;
445             break;
446     }
447
448     if (!(dc = GetDC(0)))
449     {
450         HeapFree(GetProcessHeap(), 0, b_info);
451         return HRESULT_FROM_WIN32(GetLastError());
452     }
453
454     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
455             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
456             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
457     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
458     ReleaseDC(0, dc);
459
460     if (!surface->dib.DIBsection)
461     {
462         ERR("Failed to create DIB section.\n");
463         HeapFree(GetProcessHeap(), 0, b_info);
464         return HRESULT_FROM_WIN32(GetLastError());
465     }
466
467     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
468     /* Copy the existing surface to the dib section. */
469     if (surface->resource.allocatedMemory)
470     {
471         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
472                 surface->resource.height * wined3d_surface_get_pitch(surface));
473     }
474     else
475     {
476         /* This is to make maps read the GL texture although memory is allocated. */
477         surface->flags &= ~SFLAG_INSYSMEM;
478     }
479     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
480
481     HeapFree(GetProcessHeap(), 0, b_info);
482
483     /* Now allocate a DC. */
484     surface->hDC = CreateCompatibleDC(0);
485     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
486     TRACE("Using wined3d palette %p.\n", surface->palette);
487     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
488
489     surface->flags |= SFLAG_DIBSECTION;
490
491     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
492     surface->resource.heapMemory = NULL;
493
494     return WINED3D_OK;
495 }
496
497 static void surface_prepare_system_memory(struct wined3d_surface *surface)
498 {
499     struct wined3d_device *device = surface->resource.device;
500     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
501
502     TRACE("surface %p.\n", surface);
503
504     /* Performance optimization: Count how often a surface is locked, if it is
505      * locked regularly do not throw away the system memory copy. This avoids
506      * the need to download the surface from OpenGL all the time. The surface
507      * is still downloaded if the OpenGL texture is changed. */
508     if (!(surface->flags & SFLAG_DYNLOCK))
509     {
510         if (++surface->lockCount > MAXLOCKCOUNT)
511         {
512             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
513             surface->flags |= SFLAG_DYNLOCK;
514         }
515     }
516
517     /* Create a PBO for dynamically locked surfaces but don't do it for
518      * converted or NPOT surfaces. Also don't create a PBO for systemmem
519      * surfaces. */
520     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
521             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
522             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
523     {
524         struct wined3d_context *context;
525         GLenum error;
526
527         context = context_acquire(device, NULL);
528         ENTER_GL();
529
530         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
531         error = glGetError();
532         if (!surface->pbo || error != GL_NO_ERROR)
533             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
534
535         TRACE("Binding PBO %u.\n", surface->pbo);
536
537         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
538         checkGLcall("glBindBufferARB");
539
540         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
541                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
542         checkGLcall("glBufferDataARB");
543
544         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
545         checkGLcall("glBindBufferARB");
546
547         /* We don't need the system memory anymore and we can't even use it for PBOs. */
548         if (!(surface->flags & SFLAG_CLIENT))
549         {
550             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
551             surface->resource.heapMemory = NULL;
552         }
553         surface->resource.allocatedMemory = NULL;
554         surface->flags |= SFLAG_PBO;
555         LEAVE_GL();
556         context_release(context);
557     }
558     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
559     {
560         /* Whatever surface we have, make sure that there is memory allocated
561          * for the downloaded copy, or a PBO to map. */
562         if (!surface->resource.heapMemory)
563             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
564
565         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
566                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
567
568         if (surface->flags & SFLAG_INSYSMEM)
569             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
570     }
571 }
572
573 static void surface_evict_sysmem(struct wined3d_surface *surface)
574 {
575     if (surface->flags & SFLAG_DONOTFREE)
576         return;
577
578     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
579     surface->resource.allocatedMemory = NULL;
580     surface->resource.heapMemory = NULL;
581     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
582 }
583
584 /* Context activation is done by the caller. */
585 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
586         const struct wined3d_gl_info *gl_info, BOOL srgb)
587 {
588     struct wined3d_device *device = surface->resource.device;
589     DWORD active_sampler;
590     GLint active_texture;
591
592     /* We don't need a specific texture unit, but after binding the texture
593      * the current unit is dirty. Read the unit back instead of switching to
594      * 0, this avoids messing around with the state manager's GL states. The
595      * current texture unit should always be a valid one.
596      *
597      * To be more specific, this is tricky because we can implicitly be
598      * called from sampler() in state.c. This means we can't touch anything
599      * other than whatever happens to be the currently active texture, or we
600      * would risk marking already applied sampler states dirty again.
601      *
602      * TODO: Track the current active texture per GL context instead of using
603      * glGet(). */
604
605     ENTER_GL();
606     glGetIntegerv(GL_ACTIVE_TEXTURE, &active_texture);
607     LEAVE_GL();
608     active_sampler = device->rev_tex_unit_map[active_texture - GL_TEXTURE0_ARB];
609
610     if (active_sampler != WINED3D_UNMAPPED_STAGE)
611         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
612     surface_bind(surface, gl_info, srgb);
613 }
614
615 static void surface_force_reload(struct wined3d_surface *surface)
616 {
617     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
618 }
619
620 static void surface_release_client_storage(struct wined3d_surface *surface)
621 {
622     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
623
624     ENTER_GL();
625     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
626     if (surface->texture_name)
627     {
628         surface_bind_and_dirtify(surface, context->gl_info, FALSE);
629         glTexImage2D(surface->texture_target, surface->texture_level,
630                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
631     }
632     if (surface->texture_name_srgb)
633     {
634         surface_bind_and_dirtify(surface, context->gl_info, TRUE);
635         glTexImage2D(surface->texture_target, surface->texture_level,
636                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
637     }
638     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
639     LEAVE_GL();
640
641     context_release(context);
642
643     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
644     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
645     surface_force_reload(surface);
646 }
647
648 static HRESULT surface_private_setup(struct wined3d_surface *surface)
649 {
650     /* TODO: Check against the maximum texture sizes supported by the video card. */
651     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
652     unsigned int pow2Width, pow2Height;
653
654     TRACE("surface %p.\n", surface);
655
656     surface->texture_name = 0;
657     surface->texture_target = GL_TEXTURE_2D;
658
659     /* Non-power2 support */
660     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
661     {
662         pow2Width = surface->resource.width;
663         pow2Height = surface->resource.height;
664     }
665     else
666     {
667         /* Find the nearest pow2 match */
668         pow2Width = pow2Height = 1;
669         while (pow2Width < surface->resource.width)
670             pow2Width <<= 1;
671         while (pow2Height < surface->resource.height)
672             pow2Height <<= 1;
673     }
674     surface->pow2Width = pow2Width;
675     surface->pow2Height = pow2Height;
676
677     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
678     {
679         /* TODO: Add support for non power two compressed textures. */
680         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
681         {
682             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
683                   surface, surface->resource.width, surface->resource.height);
684             return WINED3DERR_NOTAVAILABLE;
685         }
686     }
687
688     if (pow2Width != surface->resource.width
689             || pow2Height != surface->resource.height)
690     {
691         surface->flags |= SFLAG_NONPOW2;
692     }
693
694     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
695             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
696     {
697         /* One of three options:
698          * 1: Do the same as we do with NPOT and scale the texture, (any
699          *    texture ops would require the texture to be scaled which is
700          *    potentially slow)
701          * 2: Set the texture to the maximum size (bad idea).
702          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
703          * 4: Create the surface, but allow it to be used only for DirectDraw
704          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
705          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
706          *    the render target. */
707         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
708         {
709             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
710             return WINED3DERR_NOTAVAILABLE;
711         }
712
713         /* We should never use this surface in combination with OpenGL! */
714         TRACE("Creating an oversized surface: %ux%u.\n",
715                 surface->pow2Width, surface->pow2Height);
716     }
717     else
718     {
719         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
720          * and EXT_PALETTED_TEXTURE is used in combination with texture
721          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
722          * EXT_PALETTED_TEXTURE doesn't work in combination with
723          * ARB_TEXTURE_RECTANGLE. */
724         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
725                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
726                 && gl_info->supported[EXT_PALETTED_TEXTURE]
727                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
728         {
729             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
730             surface->pow2Width = surface->resource.width;
731             surface->pow2Height = surface->resource.height;
732             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
733         }
734     }
735
736     switch (wined3d_settings.offscreen_rendering_mode)
737     {
738         case ORM_FBO:
739             surface->get_drawable_size = get_drawable_size_fbo;
740             break;
741
742         case ORM_BACKBUFFER:
743             surface->get_drawable_size = get_drawable_size_backbuffer;
744             break;
745
746         default:
747             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
748             return WINED3DERR_INVALIDCALL;
749     }
750
751     surface->flags |= SFLAG_INSYSMEM;
752
753     return WINED3D_OK;
754 }
755
756 static void surface_realize_palette(struct wined3d_surface *surface)
757 {
758     struct wined3d_palette *palette = surface->palette;
759
760     TRACE("surface %p.\n", surface);
761
762     if (!palette) return;
763
764     if (surface->resource.format->id == WINED3DFMT_P8_UINT
765             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
766     {
767         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
768         {
769             /* Make sure the texture is up to date. This call doesn't do
770              * anything if the texture is already up to date. */
771             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
772
773             /* We want to force a palette refresh, so mark the drawable as not being up to date */
774             if (!surface_is_offscreen(surface))
775                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
776         }
777         else
778         {
779             if (!(surface->flags & SFLAG_INSYSMEM))
780             {
781                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
782                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
783             }
784             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
785         }
786     }
787
788     if (surface->flags & SFLAG_DIBSECTION)
789     {
790         RGBQUAD col[256];
791         unsigned int i;
792
793         TRACE("Updating the DC's palette.\n");
794
795         for (i = 0; i < 256; ++i)
796         {
797             col[i].rgbRed   = palette->palents[i].peRed;
798             col[i].rgbGreen = palette->palents[i].peGreen;
799             col[i].rgbBlue  = palette->palents[i].peBlue;
800             col[i].rgbReserved = 0;
801         }
802         SetDIBColorTable(surface->hDC, 0, 256, col);
803     }
804
805     /* Propagate the changes to the drawable when we have a palette. */
806     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
807         surface_load_location(surface, SFLAG_INDRAWABLE, NULL);
808 }
809
810 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
811 {
812     HRESULT hr;
813
814     /* If there's no destination surface there is nothing to do. */
815     if (!surface->overlay_dest)
816         return WINED3D_OK;
817
818     /* Blt calls ModifyLocation on the dest surface, which in turn calls
819      * DrawOverlay to update the overlay. Prevent an endless recursion. */
820     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
821         return WINED3D_OK;
822
823     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
824     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
825             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
826     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
827
828     return hr;
829 }
830
831 static void surface_preload(struct wined3d_surface *surface)
832 {
833     TRACE("surface %p.\n", surface);
834
835     surface_internal_preload(surface, SRGB_ANY);
836 }
837
838 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
839 {
840     struct wined3d_device *device = surface->resource.device;
841     const RECT *pass_rect = rect;
842
843     TRACE("surface %p, rect %s, flags %#x.\n",
844             surface, wine_dbgstr_rect(rect), flags);
845
846     if (flags & WINED3DLOCK_DISCARD)
847     {
848         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
849         surface_prepare_system_memory(surface);
850         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
851     }
852     else
853     {
854         /* surface_load_location() does not check if the rectangle specifies
855          * the full surface. Most callers don't need that, so do it here. */
856         if (rect && !rect->top && !rect->left
857                 && rect->right == surface->resource.width
858                 && rect->bottom == surface->resource.height)
859             pass_rect = NULL;
860
861         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
862                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
863                 || surface == device->fb.render_targets[0])))
864             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
865     }
866
867     if (surface->flags & SFLAG_PBO)
868     {
869         const struct wined3d_gl_info *gl_info;
870         struct wined3d_context *context;
871
872         context = context_acquire(device, NULL);
873         gl_info = context->gl_info;
874
875         ENTER_GL();
876         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
877         checkGLcall("glBindBufferARB");
878
879         /* This shouldn't happen but could occur if some other function
880          * didn't handle the PBO properly. */
881         if (surface->resource.allocatedMemory)
882             ERR("The surface already has PBO memory allocated.\n");
883
884         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
885         checkGLcall("glMapBufferARB");
886
887         /* Make sure the PBO isn't set anymore in order not to break non-PBO
888          * calls. */
889         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
890         checkGLcall("glBindBufferARB");
891
892         LEAVE_GL();
893         context_release(context);
894     }
895
896     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
897     {
898         if (!rect)
899             surface_add_dirty_rect(surface, NULL);
900         else
901         {
902             WINED3DBOX b;
903
904             b.Left = rect->left;
905             b.Top = rect->top;
906             b.Right = rect->right;
907             b.Bottom = rect->bottom;
908             b.Front = 0;
909             b.Back = 1;
910             surface_add_dirty_rect(surface, &b);
911         }
912     }
913 }
914
915 static void surface_unmap(struct wined3d_surface *surface)
916 {
917     struct wined3d_device *device = surface->resource.device;
918     BOOL fullsurface;
919
920     TRACE("surface %p.\n", surface);
921
922     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
923
924     if (surface->flags & SFLAG_PBO)
925     {
926         const struct wined3d_gl_info *gl_info;
927         struct wined3d_context *context;
928
929         TRACE("Freeing PBO memory.\n");
930
931         context = context_acquire(device, NULL);
932         gl_info = context->gl_info;
933
934         ENTER_GL();
935         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
936         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
937         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
938         checkGLcall("glUnmapBufferARB");
939         LEAVE_GL();
940         context_release(context);
941
942         surface->resource.allocatedMemory = NULL;
943     }
944
945     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
946
947     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
948     {
949         TRACE("Not dirtified, nothing to do.\n");
950         goto done;
951     }
952
953     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
954             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
955     {
956         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
957         {
958             static BOOL warned = FALSE;
959             if (!warned)
960             {
961                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
962                 warned = TRUE;
963             }
964             goto done;
965         }
966
967         if (!surface->dirtyRect.left && !surface->dirtyRect.top
968                 && surface->dirtyRect.right == surface->resource.width
969                 && surface->dirtyRect.bottom == surface->resource.height)
970         {
971             fullsurface = TRUE;
972         }
973         else
974         {
975             /* TODO: Proper partial rectangle tracking. */
976             fullsurface = FALSE;
977             surface->flags |= SFLAG_INSYSMEM;
978         }
979
980         surface_load_location(surface, SFLAG_INDRAWABLE, fullsurface ? NULL : &surface->dirtyRect);
981
982         /* Partial rectangle tracking is not commonly implemented, it is only
983          * done for render targets. INSYSMEM was set before to tell
984          * surface_load_location() where to read the rectangle from.
985          * Indrawable is set because all modifications from the partial
986          * sysmem copy are written back to the drawable, thus the surface is
987          * merged again in the drawable. The sysmem copy is not fully up to
988          * date because only a subrectangle was read in Map(). */
989         if (!fullsurface)
990         {
991             surface_modify_location(surface, SFLAG_INDRAWABLE, TRUE);
992             surface_evict_sysmem(surface);
993         }
994
995         surface->dirtyRect.left = surface->resource.width;
996         surface->dirtyRect.top = surface->resource.height;
997         surface->dirtyRect.right = 0;
998         surface->dirtyRect.bottom = 0;
999     }
1000     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1001     {
1002         FIXME("Depth / stencil buffer locking is not implemented.\n");
1003     }
1004
1005 done:
1006     /* Overlays have to be redrawn manually after changes with the GL implementation */
1007     if (surface->overlay_dest)
1008         surface->surface_ops->surface_draw_overlay(surface);
1009 }
1010
1011 static HRESULT surface_getdc(struct wined3d_surface *surface)
1012 {
1013     WINED3DLOCKED_RECT lock;
1014     HRESULT hr;
1015
1016     TRACE("surface %p.\n", surface);
1017
1018     /* Create a DIB section if there isn't a dc yet. */
1019     if (!surface->hDC)
1020     {
1021         if (surface->flags & SFLAG_CLIENT)
1022         {
1023             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1024             surface_release_client_storage(surface);
1025         }
1026         hr = surface_create_dib_section(surface);
1027         if (FAILED(hr))
1028             return WINED3DERR_INVALIDCALL;
1029
1030         /* Use the DIB section from now on if we are not using a PBO. */
1031         if (!(surface->flags & SFLAG_PBO))
1032             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1033     }
1034
1035     /* Map the surface. */
1036     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1037     if (FAILED(hr))
1038         ERR("Map failed, hr %#x.\n", hr);
1039
1040     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1041      * activates the allocatedMemory. */
1042     if (surface->flags & SFLAG_PBO)
1043         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->dib.bitmap_size);
1044
1045     return hr;
1046 }
1047
1048 static HRESULT surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1049 {
1050     TRACE("surface %p, override %p.\n", surface, override);
1051
1052     /* Flipping is only supported on render targets and overlays. */
1053     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
1054     {
1055         WARN("Tried to flip a non-render target, non-overlay surface.\n");
1056         return WINEDDERR_NOTFLIPPABLE;
1057     }
1058
1059     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1060     {
1061         flip_surface(surface, override);
1062
1063         /* Update the overlay if it is visible */
1064         if (surface->overlay_dest)
1065             return surface->surface_ops->surface_draw_overlay(surface);
1066         else
1067             return WINED3D_OK;
1068     }
1069
1070     return WINED3D_OK;
1071 }
1072
1073 static BOOL surface_is_full_rect(struct wined3d_surface *surface, const RECT *r)
1074 {
1075     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1076         return FALSE;
1077     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1078         return FALSE;
1079     return TRUE;
1080 }
1081
1082 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1083         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1084 {
1085     const struct wined3d_gl_info *gl_info;
1086     struct wined3d_context *context;
1087     DWORD src_mask, dst_mask;
1088     GLbitfield gl_mask;
1089
1090     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1091             device, src_surface, wine_dbgstr_rect(src_rect),
1092             dst_surface, wine_dbgstr_rect(dst_rect));
1093
1094     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1095     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1096
1097     if (src_mask != dst_mask)
1098     {
1099         ERR("Incompatible formats %s and %s.\n",
1100                 debug_d3dformat(src_surface->resource.format->id),
1101                 debug_d3dformat(dst_surface->resource.format->id));
1102         return;
1103     }
1104
1105     if (!src_mask)
1106     {
1107         ERR("Not a depth / stencil format: %s.\n",
1108                 debug_d3dformat(src_surface->resource.format->id));
1109         return;
1110     }
1111
1112     gl_mask = 0;
1113     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1114         gl_mask |= GL_DEPTH_BUFFER_BIT;
1115     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1116         gl_mask |= GL_STENCIL_BUFFER_BIT;
1117
1118     /* Make sure the locations are up-to-date. Loading the destination
1119      * surface isn't required if the entire surface is overwritten. */
1120     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1121     if (!surface_is_full_rect(dst_surface, dst_rect))
1122         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1123
1124     context = context_acquire(device, NULL);
1125     if (!context->valid)
1126     {
1127         context_release(context);
1128         WARN("Invalid context, skipping blit.\n");
1129         return;
1130     }
1131
1132     gl_info = context->gl_info;
1133
1134     ENTER_GL();
1135
1136     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1137     glReadBuffer(GL_NONE);
1138     checkGLcall("glReadBuffer()");
1139     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1140
1141     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1142     context_set_draw_buffer(context, GL_NONE);
1143     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1144
1145     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1146     {
1147         glDepthMask(GL_TRUE);
1148         device_invalidate_state(device, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1149     }
1150     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1151     {
1152         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1153         {
1154             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1155             device_invalidate_state(device, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1156         }
1157         glStencilMask(~0U);
1158         device_invalidate_state(device, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1159     }
1160
1161     glDisable(GL_SCISSOR_TEST);
1162     device_invalidate_state(device, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1163
1164     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1165             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1166     checkGLcall("glBlitFramebuffer()");
1167
1168     LEAVE_GL();
1169
1170     if (wined3d_settings.strict_draw_ordering)
1171         wglFlush(); /* Flush to ensure ordering across contexts. */
1172
1173     context_release(context);
1174 }
1175
1176 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1177         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1178         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1179 {
1180     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1181         return FALSE;
1182
1183     /* Source and/or destination need to be on the GL side */
1184     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1185         return FALSE;
1186
1187     switch (blit_op)
1188     {
1189         case WINED3D_BLIT_OP_COLOR_BLIT:
1190             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1191                 return FALSE;
1192             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1193                 return FALSE;
1194             break;
1195
1196         case WINED3D_BLIT_OP_DEPTH_BLIT:
1197             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1198                 return FALSE;
1199             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1200                 return FALSE;
1201             break;
1202
1203         default:
1204             return FALSE;
1205     }
1206
1207     if (!(src_format->id == dst_format->id
1208             || (is_identity_fixup(src_format->color_fixup)
1209             && is_identity_fixup(dst_format->color_fixup))))
1210         return FALSE;
1211
1212     return TRUE;
1213 }
1214
1215 static BOOL surface_convert_depth_to_float(struct wined3d_surface *surface, DWORD depth, float *float_depth)
1216 {
1217     const struct wined3d_format *format = surface->resource.format;
1218
1219     switch (format->id)
1220     {
1221         case WINED3DFMT_S1_UINT_D15_UNORM:
1222             *float_depth = depth / (float)0x00007fff;
1223             break;
1224
1225         case WINED3DFMT_D16_UNORM:
1226             *float_depth = depth / (float)0x0000ffff;
1227             break;
1228
1229         case WINED3DFMT_D24_UNORM_S8_UINT:
1230         case WINED3DFMT_X8D24_UNORM:
1231             *float_depth = depth / (float)0x00ffffff;
1232             break;
1233
1234         case WINED3DFMT_D32_UNORM:
1235             *float_depth = depth / (float)0xffffffff;
1236             break;
1237
1238         default:
1239             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1240             return FALSE;
1241     }
1242
1243     return TRUE;
1244 }
1245
1246 /* Do not call while under the GL lock. */
1247 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1248 {
1249     const struct wined3d_resource *resource = &surface->resource;
1250     struct wined3d_device *device = resource->device;
1251     const struct blit_shader *blitter;
1252
1253     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1254             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1255     if (!blitter)
1256     {
1257         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1258         return WINED3DERR_INVALIDCALL;
1259     }
1260
1261     return blitter->depth_fill(device, surface, rect, depth);
1262 }
1263
1264 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1265         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1266 {
1267     struct wined3d_device *device = src_surface->resource.device;
1268
1269     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1270             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1271             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1272         return WINED3DERR_INVALIDCALL;
1273
1274     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1275
1276     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1277             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1278     surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
1279
1280     return WINED3D_OK;
1281 }
1282
1283 /* Do not call while under the GL lock. */
1284 static HRESULT surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1285         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1286         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1287 {
1288     struct wined3d_device *device = dst_surface->resource.device;
1289     DWORD src_ds_flags, dst_ds_flags;
1290
1291     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1292             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1293             flags, fx, debug_d3dtexturefiltertype(filter));
1294     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1295
1296     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1297     {
1298         WARN(" Surface is busy, returning DDERR_SURFACEBUSY\n");
1299         return WINEDDERR_SURFACEBUSY;
1300     }
1301
1302     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1303     if (src_surface)
1304         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1305     else
1306         src_ds_flags = 0;
1307
1308     if (src_ds_flags || dst_ds_flags)
1309     {
1310         if (flags & WINEDDBLT_DEPTHFILL)
1311         {
1312             float depth;
1313             RECT rect;
1314
1315             TRACE("Depth fill.\n");
1316
1317             surface_get_rect(dst_surface, dst_rect_in, &rect);
1318
1319             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1320                 return WINED3DERR_INVALIDCALL;
1321
1322             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &rect, depth)))
1323                 return WINED3D_OK;
1324         }
1325         else
1326         {
1327             RECT src_rect, dst_rect;
1328
1329             /* Accessing depth / stencil surfaces is supposed to fail while in
1330              * a scene, except for fills, which seem to work. */
1331             if (device->inScene)
1332             {
1333                 WARN("Rejecting depth / stencil access while in scene.\n");
1334                 return WINED3DERR_INVALIDCALL;
1335             }
1336
1337             if (src_ds_flags != dst_ds_flags)
1338             {
1339                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1340                 return WINED3DERR_INVALIDCALL;
1341             }
1342
1343             if (src_rect_in && (src_rect_in->top || src_rect_in->left
1344                     || src_rect_in->bottom != src_surface->resource.height
1345                     || src_rect_in->right != src_surface->resource.width))
1346             {
1347                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1348                         wine_dbgstr_rect(src_rect_in));
1349                 return WINED3DERR_INVALIDCALL;
1350             }
1351
1352             if (dst_rect_in && (dst_rect_in->top || dst_rect_in->left
1353                     || dst_rect_in->bottom != dst_surface->resource.height
1354                     || dst_rect_in->right != dst_surface->resource.width))
1355             {
1356                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1357                         wine_dbgstr_rect(src_rect_in));
1358                 return WINED3DERR_INVALIDCALL;
1359             }
1360
1361             if (src_surface->resource.height != dst_surface->resource.height
1362                     || src_surface->resource.width != dst_surface->resource.width)
1363             {
1364                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1365                 return WINED3DERR_INVALIDCALL;
1366             }
1367
1368             surface_get_rect(src_surface, src_rect_in, &src_rect);
1369             surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1370
1371             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1372                 return WINED3D_OK;
1373         }
1374     }
1375
1376     /* Special cases for render targets. */
1377     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1378             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1379     {
1380         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, dst_rect_in,
1381                 src_surface, src_rect_in, flags, fx, filter)))
1382             return WINED3D_OK;
1383     }
1384
1385     /* For the rest call the X11 surface implementation. For render targets
1386      * this should be implemented OpenGL accelerated in BltOverride, other
1387      * blits are rather rare. */
1388     return surface_cpu_blt(dst_surface, dst_rect_in, src_surface, src_rect_in, flags, fx, filter);
1389 }
1390
1391 /* Do not call while under the GL lock. */
1392 static HRESULT surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1393         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1394 {
1395     struct wined3d_device *device = dst_surface->resource.device;
1396
1397     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
1398             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1399
1400     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface->flags & SFLAG_LOCKED))
1401     {
1402         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1403         return WINEDDERR_SURFACEBUSY;
1404     }
1405
1406     if (device->inScene && (dst_surface == device->fb.depth_stencil || src_surface == device->fb.depth_stencil))
1407     {
1408         WARN("Attempt to access the depth / stencil surface while in a scene.\n");
1409         return WINED3DERR_INVALIDCALL;
1410     }
1411
1412     /* Special cases for RenderTargets */
1413     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1414             || (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
1415     {
1416
1417         RECT src_rect, dst_rect;
1418         DWORD flags = 0;
1419
1420         surface_get_rect(src_surface, src_rect_in, &src_rect);
1421
1422         dst_rect.left = dst_x;
1423         dst_rect.top = dst_y;
1424         dst_rect.right = dst_x + src_rect.right - src_rect.left;
1425         dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1426
1427         /* Convert BltFast flags into Blt ones because BltOverride is called
1428          * from Blt as well. */
1429         if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1430             flags |= WINEDDBLT_KEYSRC;
1431         if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1432             flags |= WINEDDBLT_KEYDEST;
1433         if (trans & WINEDDBLTFAST_WAIT)
1434             flags |= WINEDDBLT_WAIT;
1435         if (trans & WINEDDBLTFAST_DONOTWAIT)
1436             flags |= WINEDDBLT_DONOTWAIT;
1437
1438         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface,
1439                 &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT)))
1440             return WINED3D_OK;
1441     }
1442
1443     return surface_cpu_bltfast(dst_surface, dst_x, dst_y, src_surface, src_rect_in, trans);
1444 }
1445
1446 static HRESULT surface_set_mem(struct wined3d_surface *surface, void *mem)
1447 {
1448     TRACE("surface %p, mem %p.\n", surface, mem);
1449
1450     if (mem && mem != surface->resource.allocatedMemory)
1451     {
1452         void *release = NULL;
1453
1454         /* Do I have to copy the old surface content? */
1455         if (surface->flags & SFLAG_DIBSECTION)
1456         {
1457             SelectObject(surface->hDC, surface->dib.holdbitmap);
1458             DeleteDC(surface->hDC);
1459             /* Release the DIB section. */
1460             DeleteObject(surface->dib.DIBsection);
1461             surface->dib.bitmap_data = NULL;
1462             surface->resource.allocatedMemory = NULL;
1463             surface->hDC = NULL;
1464             surface->flags &= ~SFLAG_DIBSECTION;
1465         }
1466         else if (!(surface->flags & SFLAG_USERPTR))
1467         {
1468             release = surface->resource.heapMemory;
1469             surface->resource.heapMemory = NULL;
1470         }
1471         surface->resource.allocatedMemory = mem;
1472         surface->flags |= SFLAG_USERPTR;
1473
1474         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
1475         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1476
1477         /* For client textures OpenGL has to be notified. */
1478         if (surface->flags & SFLAG_CLIENT)
1479             surface_release_client_storage(surface);
1480
1481         /* Now free the old memory if any. */
1482         HeapFree(GetProcessHeap(), 0, release);
1483     }
1484     else if (surface->flags & SFLAG_USERPTR)
1485     {
1486         /* Map and GetDC will re-create the dib section and allocated memory. */
1487         surface->resource.allocatedMemory = NULL;
1488         /* HeapMemory should be NULL already. */
1489         if (surface->resource.heapMemory)
1490             ERR("User pointer surface has heap memory allocated.\n");
1491         surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
1492
1493         if (surface->flags & SFLAG_CLIENT)
1494             surface_release_client_storage(surface);
1495
1496         surface_prepare_system_memory(surface);
1497         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1498     }
1499
1500     return WINED3D_OK;
1501 }
1502
1503 /* Context activation is done by the caller. */
1504 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1505 {
1506     if (!surface->resource.heapMemory)
1507     {
1508         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1509         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1510                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1511     }
1512
1513     ENTER_GL();
1514     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1515     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1516     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1517             surface->resource.size, surface->resource.allocatedMemory));
1518     checkGLcall("glGetBufferSubDataARB");
1519     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1520     checkGLcall("glDeleteBuffersARB");
1521     LEAVE_GL();
1522
1523     surface->pbo = 0;
1524     surface->flags &= ~SFLAG_PBO;
1525 }
1526
1527 /* Do not call while under the GL lock. */
1528 static void surface_unload(struct wined3d_resource *resource)
1529 {
1530     struct wined3d_surface *surface = surface_from_resource(resource);
1531     struct wined3d_renderbuffer_entry *entry, *entry2;
1532     struct wined3d_device *device = resource->device;
1533     const struct wined3d_gl_info *gl_info;
1534     struct wined3d_context *context;
1535
1536     TRACE("surface %p.\n", surface);
1537
1538     if (resource->pool == WINED3DPOOL_DEFAULT)
1539     {
1540         /* Default pool resources are supposed to be destroyed before Reset is called.
1541          * Implicit resources stay however. So this means we have an implicit render target
1542          * or depth stencil. The content may be destroyed, but we still have to tear down
1543          * opengl resources, so we cannot leave early.
1544          *
1545          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1546          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1547          * or the depth stencil into an FBO the texture or render buffer will be removed
1548          * and all flags get lost
1549          */
1550         surface_init_sysmem(surface);
1551     }
1552     else
1553     {
1554         /* Load the surface into system memory */
1555         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1556         surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
1557     }
1558     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1559     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1560     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1561
1562     context = context_acquire(device, NULL);
1563     gl_info = context->gl_info;
1564
1565     /* Destroy PBOs, but load them into real sysmem before */
1566     if (surface->flags & SFLAG_PBO)
1567         surface_remove_pbo(surface, gl_info);
1568
1569     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1570      * all application-created targets the application has to release the surface
1571      * before calling _Reset
1572      */
1573     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1574     {
1575         ENTER_GL();
1576         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1577         LEAVE_GL();
1578         list_remove(&entry->entry);
1579         HeapFree(GetProcessHeap(), 0, entry);
1580     }
1581     list_init(&surface->renderbuffers);
1582     surface->current_renderbuffer = NULL;
1583
1584     /* If we're in a texture, the texture name belongs to the texture.
1585      * Otherwise, destroy it. */
1586     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1587     {
1588         ENTER_GL();
1589         glDeleteTextures(1, &surface->texture_name);
1590         surface->texture_name = 0;
1591         glDeleteTextures(1, &surface->texture_name_srgb);
1592         surface->texture_name_srgb = 0;
1593         LEAVE_GL();
1594     }
1595
1596     context_release(context);
1597
1598     resource_unload(resource);
1599 }
1600
1601 static const struct wined3d_resource_ops surface_resource_ops =
1602 {
1603     surface_unload,
1604 };
1605
1606 static const struct wined3d_surface_ops surface_ops =
1607 {
1608     surface_private_setup,
1609     surface_cleanup,
1610     surface_realize_palette,
1611     surface_draw_overlay,
1612     surface_preload,
1613     surface_map,
1614     surface_unmap,
1615     surface_getdc,
1616     surface_flip,
1617     surface_blt,
1618     surface_bltfast,
1619     surface_set_mem,
1620 };
1621
1622 /*****************************************************************************
1623  * Initializes the GDI surface, aka creates the DIB section we render to
1624  * The DIB section creation is done by calling GetDC, which will create the
1625  * section and releasing the dc to allow the app to use it. The dib section
1626  * will stay until the surface is released
1627  *
1628  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1629  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1630  * avoid confusion in the shared surface code.
1631  *
1632  * Returns:
1633  *  WINED3D_OK on success
1634  *  The return values of called methods on failure
1635  *
1636  *****************************************************************************/
1637 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1638 {
1639     HRESULT hr;
1640
1641     TRACE("surface %p.\n", surface);
1642
1643     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1644     {
1645         ERR("Overlays not yet supported by GDI surfaces.\n");
1646         return WINED3DERR_INVALIDCALL;
1647     }
1648
1649     /* Sysmem textures have memory already allocated - release it,
1650      * this avoids an unnecessary memcpy. */
1651     hr = surface_create_dib_section(surface);
1652     if (SUCCEEDED(hr))
1653     {
1654         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1655         surface->resource.heapMemory = NULL;
1656         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1657     }
1658
1659     /* We don't mind the nonpow2 stuff in GDI. */
1660     surface->pow2Width = surface->resource.width;
1661     surface->pow2Height = surface->resource.height;
1662
1663     return WINED3D_OK;
1664 }
1665
1666 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1667 {
1668     TRACE("surface %p.\n", surface);
1669
1670     if (surface->flags & SFLAG_DIBSECTION)
1671     {
1672         /* Release the DC. */
1673         SelectObject(surface->hDC, surface->dib.holdbitmap);
1674         DeleteDC(surface->hDC);
1675         /* Release the DIB section. */
1676         DeleteObject(surface->dib.DIBsection);
1677         surface->dib.bitmap_data = NULL;
1678         surface->resource.allocatedMemory = NULL;
1679     }
1680
1681     if (surface->flags & SFLAG_USERPTR)
1682         wined3d_surface_set_mem(surface, NULL);
1683     if (surface->overlay_dest)
1684         list_remove(&surface->overlay_entry);
1685
1686     HeapFree(GetProcessHeap(), 0, surface->palette9);
1687
1688     resource_cleanup(&surface->resource);
1689 }
1690
1691 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1692 {
1693     struct wined3d_palette *palette = surface->palette;
1694
1695     TRACE("surface %p.\n", surface);
1696
1697     if (!palette) return;
1698
1699     if (surface->flags & SFLAG_DIBSECTION)
1700     {
1701         RGBQUAD col[256];
1702         unsigned int i;
1703
1704         TRACE("Updating the DC's palette.\n");
1705
1706         for (i = 0; i < 256; ++i)
1707         {
1708             col[i].rgbRed = palette->palents[i].peRed;
1709             col[i].rgbGreen = palette->palents[i].peGreen;
1710             col[i].rgbBlue = palette->palents[i].peBlue;
1711             col[i].rgbReserved = 0;
1712         }
1713         SetDIBColorTable(surface->hDC, 0, 256, col);
1714     }
1715
1716     /* Update the image because of the palette change. Some games like e.g.
1717      * Red Alert call SetEntries a lot to implement fading. */
1718     /* Tell the swapchain to update the screen. */
1719     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1720     {
1721         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1722         if (surface == swapchain->front_buffer)
1723         {
1724             x11_copy_to_screen(swapchain, NULL);
1725         }
1726     }
1727 }
1728
1729 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
1730 {
1731     FIXME("GDI surfaces can't draw overlays yet.\n");
1732     return E_FAIL;
1733 }
1734
1735 static void gdi_surface_preload(struct wined3d_surface *surface)
1736 {
1737     TRACE("surface %p.\n", surface);
1738
1739     ERR("Preloading GDI surfaces is not supported.\n");
1740 }
1741
1742 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1743 {
1744     TRACE("surface %p, rect %s, flags %#x.\n",
1745             surface, wine_dbgstr_rect(rect), flags);
1746
1747     if (!surface->resource.allocatedMemory)
1748     {
1749         /* This happens on gdi surfaces if the application set a user pointer
1750          * and resets it. Recreate the DIB section. */
1751         surface_create_dib_section(surface);
1752         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1753     }
1754 }
1755
1756 static void gdi_surface_unmap(struct wined3d_surface *surface)
1757 {
1758     TRACE("surface %p.\n", surface);
1759
1760     /* Tell the swapchain to update the screen. */
1761     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1762     {
1763         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1764         if (surface == swapchain->front_buffer)
1765         {
1766             x11_copy_to_screen(swapchain, &surface->lockedRect);
1767         }
1768     }
1769
1770     memset(&surface->lockedRect, 0, sizeof(RECT));
1771 }
1772
1773 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
1774 {
1775     WINED3DLOCKED_RECT lock;
1776     HRESULT hr;
1777
1778     TRACE("surface %p.\n", surface);
1779
1780     /* Should have a DIB section already. */
1781     if (!(surface->flags & SFLAG_DIBSECTION))
1782     {
1783         WARN("DC not supported on this surface\n");
1784         return WINED3DERR_INVALIDCALL;
1785     }
1786
1787     /* Map the surface. */
1788     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1789     if (FAILED(hr))
1790         ERR("Map failed, hr %#x.\n", hr);
1791
1792     return hr;
1793 }
1794
1795 static HRESULT gdi_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1796 {
1797     TRACE("surface %p, override %p.\n", surface, override);
1798
1799     return WINED3D_OK;
1800 }
1801
1802 static HRESULT gdi_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
1803         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
1804         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1805 {
1806     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1807             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
1808             flags, fx, debug_d3dtexturefiltertype(filter));
1809
1810     return surface_cpu_blt(dst_surface, dst_rect, src_surface, src_rect, flags, fx, filter);
1811 }
1812
1813 static HRESULT gdi_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1814         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
1815 {
1816     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
1817             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
1818
1819     return surface_cpu_bltfast(dst_surface, dst_x, dst_y, src_surface, src_rect, trans);
1820 }
1821
1822 static HRESULT gdi_surface_set_mem(struct wined3d_surface *surface, void *mem)
1823 {
1824     TRACE("surface %p, mem %p.\n", surface, mem);
1825
1826     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
1827     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1828     {
1829         ERR("Not supported on render targets.\n");
1830         return WINED3DERR_INVALIDCALL;
1831     }
1832
1833     if (mem && mem != surface->resource.allocatedMemory)
1834     {
1835         void *release = NULL;
1836
1837         /* Do I have to copy the old surface content? */
1838         if (surface->flags & SFLAG_DIBSECTION)
1839         {
1840             SelectObject(surface->hDC, surface->dib.holdbitmap);
1841             DeleteDC(surface->hDC);
1842             /* Release the DIB section. */
1843             DeleteObject(surface->dib.DIBsection);
1844             surface->dib.bitmap_data = NULL;
1845             surface->resource.allocatedMemory = NULL;
1846             surface->hDC = NULL;
1847             surface->flags &= ~SFLAG_DIBSECTION;
1848         }
1849         else if (!(surface->flags & SFLAG_USERPTR))
1850         {
1851             release = surface->resource.allocatedMemory;
1852         }
1853         surface->resource.allocatedMemory = mem;
1854         surface->flags |= SFLAG_USERPTR | SFLAG_INSYSMEM;
1855
1856         /* Now free the old memory, if any. */
1857         HeapFree(GetProcessHeap(), 0, release);
1858     }
1859     else if (surface->flags & SFLAG_USERPTR)
1860     {
1861         /* Map() and GetDC() will re-create the dib section and allocated memory. */
1862         surface->resource.allocatedMemory = NULL;
1863         surface->flags &= ~SFLAG_USERPTR;
1864     }
1865
1866     return WINED3D_OK;
1867 }
1868
1869 static const struct wined3d_surface_ops gdi_surface_ops =
1870 {
1871     gdi_surface_private_setup,
1872     surface_gdi_cleanup,
1873     gdi_surface_realize_palette,
1874     gdi_surface_draw_overlay,
1875     gdi_surface_preload,
1876     gdi_surface_map,
1877     gdi_surface_unmap,
1878     gdi_surface_getdc,
1879     gdi_surface_flip,
1880     gdi_surface_blt,
1881     gdi_surface_bltfast,
1882     gdi_surface_set_mem,
1883 };
1884
1885 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
1886 {
1887     GLuint *name;
1888     DWORD flag;
1889
1890     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
1891
1892     if(srgb)
1893     {
1894         name = &surface->texture_name_srgb;
1895         flag = SFLAG_INSRGBTEX;
1896     }
1897     else
1898     {
1899         name = &surface->texture_name;
1900         flag = SFLAG_INTEXTURE;
1901     }
1902
1903     if (!*name && new_name)
1904     {
1905         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
1906          * surface has no texture name yet. See if we can get rid of this. */
1907         if (surface->flags & flag)
1908             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
1909         surface_modify_location(surface, flag, FALSE);
1910     }
1911
1912     *name = new_name;
1913     surface_force_reload(surface);
1914 }
1915
1916 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
1917 {
1918     TRACE("surface %p, target %#x.\n", surface, target);
1919
1920     if (surface->texture_target != target)
1921     {
1922         if (target == GL_TEXTURE_RECTANGLE_ARB)
1923         {
1924             surface->flags &= ~SFLAG_NORMCOORD;
1925         }
1926         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
1927         {
1928             surface->flags |= SFLAG_NORMCOORD;
1929         }
1930     }
1931     surface->texture_target = target;
1932     surface_force_reload(surface);
1933 }
1934
1935 /* Context activation is done by the caller. */
1936 void surface_bind(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
1937 {
1938     TRACE("surface %p, gl_info %p, srgb %#x.\n", surface, gl_info, srgb);
1939
1940     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
1941     {
1942         struct wined3d_texture *texture = surface->container.u.texture;
1943
1944         TRACE("Passing to container (%p).\n", texture);
1945         texture->texture_ops->texture_bind(texture, gl_info, srgb);
1946     }
1947     else
1948     {
1949         if (surface->texture_level)
1950         {
1951             ERR("Standalone surface %p is non-zero texture level %u.\n",
1952                     surface, surface->texture_level);
1953         }
1954
1955         if (srgb)
1956             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
1957
1958         ENTER_GL();
1959
1960         if (!surface->texture_name)
1961         {
1962             glGenTextures(1, &surface->texture_name);
1963             checkGLcall("glGenTextures");
1964
1965             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
1966
1967             glBindTexture(surface->texture_target, surface->texture_name);
1968             checkGLcall("glBindTexture");
1969             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
1970             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
1971             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
1972             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1973             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1974             checkGLcall("glTexParameteri");
1975         }
1976         else
1977         {
1978             glBindTexture(surface->texture_target, surface->texture_name);
1979             checkGLcall("glBindTexture");
1980         }
1981
1982         LEAVE_GL();
1983     }
1984 }
1985
1986 /* This function checks if the primary render target uses the 8bit paletted format. */
1987 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1988 {
1989     if (device->fb.render_targets && device->fb.render_targets[0])
1990     {
1991         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1992         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1993                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1994             return TRUE;
1995     }
1996     return FALSE;
1997 }
1998
1999 /* This call just downloads data, the caller is responsible for binding the
2000  * correct texture. */
2001 /* Context activation is done by the caller. */
2002 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2003 {
2004     const struct wined3d_format *format = surface->resource.format;
2005
2006     /* Only support read back of converted P8 surfaces. */
2007     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2008     {
2009         FIXME("Readback conversion not supported for format %s.\n", debug_d3dformat(format->id));
2010         return;
2011     }
2012
2013     ENTER_GL();
2014
2015     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2016     {
2017         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2018                 surface, surface->texture_level, format->glFormat, format->glType,
2019                 surface->resource.allocatedMemory);
2020
2021         if (surface->flags & SFLAG_PBO)
2022         {
2023             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2024             checkGLcall("glBindBufferARB");
2025             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2026             checkGLcall("glGetCompressedTexImageARB");
2027             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2028             checkGLcall("glBindBufferARB");
2029         }
2030         else
2031         {
2032             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2033                     surface->texture_level, surface->resource.allocatedMemory));
2034             checkGLcall("glGetCompressedTexImageARB");
2035         }
2036
2037         LEAVE_GL();
2038     }
2039     else
2040     {
2041         void *mem;
2042         GLenum gl_format = format->glFormat;
2043         GLenum gl_type = format->glType;
2044         int src_pitch = 0;
2045         int dst_pitch = 0;
2046
2047         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2048         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2049         {
2050             gl_format = GL_ALPHA;
2051             gl_type = GL_UNSIGNED_BYTE;
2052         }
2053
2054         if (surface->flags & SFLAG_NONPOW2)
2055         {
2056             unsigned char alignment = surface->resource.device->surface_alignment;
2057             src_pitch = format->byte_count * surface->pow2Width;
2058             dst_pitch = wined3d_surface_get_pitch(surface);
2059             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2060             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2061         }
2062         else
2063         {
2064             mem = surface->resource.allocatedMemory;
2065         }
2066
2067         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2068                 surface, surface->texture_level, gl_format, gl_type, mem);
2069
2070         if (surface->flags & SFLAG_PBO)
2071         {
2072             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2073             checkGLcall("glBindBufferARB");
2074
2075             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2076             checkGLcall("glGetTexImage");
2077
2078             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2079             checkGLcall("glBindBufferARB");
2080         }
2081         else
2082         {
2083             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2084             checkGLcall("glGetTexImage");
2085         }
2086         LEAVE_GL();
2087
2088         if (surface->flags & SFLAG_NONPOW2)
2089         {
2090             const BYTE *src_data;
2091             BYTE *dst_data;
2092             UINT y;
2093             /*
2094              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2095              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2096              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2097              *
2098              * We're doing this...
2099              *
2100              * instead of boxing the texture :
2101              * |<-texture width ->|  -->pow2width|   /\
2102              * |111111111111111111|              |   |
2103              * |222 Texture 222222| boxed empty  | texture height
2104              * |3333 Data 33333333|              |   |
2105              * |444444444444444444|              |   \/
2106              * -----------------------------------   |
2107              * |     boxed  empty | boxed empty  | pow2height
2108              * |                  |              |   \/
2109              * -----------------------------------
2110              *
2111              *
2112              * we're repacking the data to the expected texture width
2113              *
2114              * |<-texture width ->|  -->pow2width|   /\
2115              * |111111111111111111222222222222222|   |
2116              * |222333333333333333333444444444444| texture height
2117              * |444444                           |   |
2118              * |                                 |   \/
2119              * |                                 |   |
2120              * |            empty                | pow2height
2121              * |                                 |   \/
2122              * -----------------------------------
2123              *
2124              * == is the same as
2125              *
2126              * |<-texture width ->|    /\
2127              * |111111111111111111|
2128              * |222222222222222222|texture height
2129              * |333333333333333333|
2130              * |444444444444444444|    \/
2131              * --------------------
2132              *
2133              * this also means that any references to allocatedMemory should work with the data as if were a
2134              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2135              *
2136              * internally the texture is still stored in a boxed format so any references to textureName will
2137              * get a boxed texture with width pow2width and not a texture of width resource.width.
2138              *
2139              * Performance should not be an issue, because applications normally do not lock the surfaces when
2140              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2141              * and doesn't have to be re-read. */
2142             src_data = mem;
2143             dst_data = surface->resource.allocatedMemory;
2144             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2145             for (y = 1; y < surface->resource.height; ++y)
2146             {
2147                 /* skip the first row */
2148                 src_data += src_pitch;
2149                 dst_data += dst_pitch;
2150                 memcpy(dst_data, src_data, dst_pitch);
2151             }
2152
2153             HeapFree(GetProcessHeap(), 0, mem);
2154         }
2155     }
2156
2157     /* Surface has now been downloaded */
2158     surface->flags |= SFLAG_INSYSMEM;
2159 }
2160
2161 /* This call just uploads data, the caller is responsible for binding the
2162  * correct texture. */
2163 /* Context activation is done by the caller. */
2164 void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2165         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2166         BOOL srgb, const struct wined3d_bo_address *data)
2167 {
2168     UINT update_w = src_rect->right - src_rect->left;
2169     UINT update_h = src_rect->bottom - src_rect->top;
2170
2171     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2172             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2173             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2174
2175     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2176         update_h *= format->heightscale;
2177
2178     ENTER_GL();
2179
2180     if (data->buffer_object)
2181     {
2182         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2183         checkGLcall("glBindBufferARB");
2184     }
2185
2186     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2187     {
2188         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2189         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2190         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2191         const BYTE *addr = data->addr;
2192         GLenum internal;
2193
2194         addr += (src_rect->top / format->block_height) * src_pitch;
2195         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2196
2197         if (srgb)
2198             internal = format->glGammaInternal;
2199         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2200             internal = format->rtInternal;
2201         else
2202             internal = format->glInternal;
2203
2204         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2205                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2206                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2207
2208         if (row_length == src_pitch)
2209         {
2210             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2211                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2212         }
2213         else
2214         {
2215             UINT row, y;
2216
2217             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2218              * can't use the unpack row length like below. */
2219             for (row = 0, y = dst_point->y; row < row_count; ++row)
2220             {
2221                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2222                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2223                 y += format->block_height;
2224                 addr += src_pitch;
2225             }
2226         }
2227         checkGLcall("glCompressedTexSubImage2DARB");
2228     }
2229     else
2230     {
2231         const BYTE *addr = data->addr;
2232
2233         addr += src_rect->top * src_w * format->byte_count;
2234         addr += src_rect->left * format->byte_count;
2235
2236         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2237                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2238                 update_w, update_h, format->glFormat, format->glType, addr);
2239
2240         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2241         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2242                 update_w, update_h, format->glFormat, format->glType, addr);
2243         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2244         checkGLcall("glTexSubImage2D");
2245     }
2246
2247     if (data->buffer_object)
2248     {
2249         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2250         checkGLcall("glBindBufferARB");
2251     }
2252
2253     LEAVE_GL();
2254
2255     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2256     {
2257         struct wined3d_device *device = surface->resource.device;
2258         unsigned int i;
2259
2260         for (i = 0; i < device->context_count; ++i)
2261         {
2262             context_surface_update(device->contexts[i], surface);
2263         }
2264     }
2265 }
2266
2267 /* This call just allocates the texture, the caller is responsible for binding
2268  * the correct texture. */
2269 /* Context activation is done by the caller. */
2270 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2271         const struct wined3d_format *format, BOOL srgb)
2272 {
2273     BOOL enable_client_storage = FALSE;
2274     GLsizei width = surface->pow2Width;
2275     GLsizei height = surface->pow2Height;
2276     const BYTE *mem = NULL;
2277     GLenum internal;
2278
2279     if (srgb)
2280     {
2281         internal = format->glGammaInternal;
2282     }
2283     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2284     {
2285         internal = format->rtInternal;
2286     }
2287     else
2288     {
2289         internal = format->glInternal;
2290     }
2291
2292     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2293
2294     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2295             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2296             internal, width, height, format->glFormat, format->glType);
2297
2298     ENTER_GL();
2299
2300     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2301     {
2302         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2303                 || !surface->resource.allocatedMemory)
2304         {
2305             /* In some cases we want to disable client storage.
2306              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2307              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2308              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2309              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2310              */
2311             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2312             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2313             surface->flags &= ~SFLAG_CLIENT;
2314             enable_client_storage = TRUE;
2315         }
2316         else
2317         {
2318             surface->flags |= SFLAG_CLIENT;
2319
2320             /* Point OpenGL to our allocated texture memory. Do not use
2321              * resource.allocatedMemory here because it might point into a
2322              * PBO. Instead use heapMemory, but get the alignment right. */
2323             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2324                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2325         }
2326     }
2327
2328     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2329     {
2330         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2331                 internal, width, height, 0, surface->resource.size, mem));
2332         checkGLcall("glCompressedTexImage2DARB");
2333     }
2334     else
2335     {
2336         glTexImage2D(surface->texture_target, surface->texture_level,
2337                 internal, width, height, 0, format->glFormat, format->glType, mem);
2338         checkGLcall("glTexImage2D");
2339     }
2340
2341     if(enable_client_storage) {
2342         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2343         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2344     }
2345     LEAVE_GL();
2346 }
2347
2348 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2349  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2350 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2351 /* GL locking is done by the caller */
2352 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2353 {
2354     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2355     struct wined3d_renderbuffer_entry *entry;
2356     GLuint renderbuffer = 0;
2357     unsigned int src_width, src_height;
2358     unsigned int width, height;
2359
2360     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2361     {
2362         width = rt->pow2Width;
2363         height = rt->pow2Height;
2364     }
2365     else
2366     {
2367         width = surface->pow2Width;
2368         height = surface->pow2Height;
2369     }
2370
2371     src_width = surface->pow2Width;
2372     src_height = surface->pow2Height;
2373
2374     /* A depth stencil smaller than the render target is not valid */
2375     if (width > src_width || height > src_height) return;
2376
2377     /* Remove any renderbuffer set if the sizes match */
2378     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2379             || (width == src_width && height == src_height))
2380     {
2381         surface->current_renderbuffer = NULL;
2382         return;
2383     }
2384
2385     /* Look if we've already got a renderbuffer of the correct dimensions */
2386     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2387     {
2388         if (entry->width == width && entry->height == height)
2389         {
2390             renderbuffer = entry->id;
2391             surface->current_renderbuffer = entry;
2392             break;
2393         }
2394     }
2395
2396     if (!renderbuffer)
2397     {
2398         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2399         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2400         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2401                 surface->resource.format->glInternal, width, height);
2402
2403         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2404         entry->width = width;
2405         entry->height = height;
2406         entry->id = renderbuffer;
2407         list_add_head(&surface->renderbuffers, &entry->entry);
2408
2409         surface->current_renderbuffer = entry;
2410     }
2411
2412     checkGLcall("set_compatible_renderbuffer");
2413 }
2414
2415 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2416 {
2417     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2418
2419     TRACE("surface %p.\n", surface);
2420
2421     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2422     {
2423         ERR("Surface %p is not on a swapchain.\n", surface);
2424         return GL_NONE;
2425     }
2426
2427     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2428     {
2429         if (swapchain->render_to_fbo)
2430         {
2431             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2432             return GL_COLOR_ATTACHMENT0;
2433         }
2434         TRACE("Returning GL_BACK\n");
2435         return GL_BACK;
2436     }
2437     else if (surface == swapchain->front_buffer)
2438     {
2439         TRACE("Returning GL_FRONT\n");
2440         return GL_FRONT;
2441     }
2442
2443     FIXME("Higher back buffer, returning GL_BACK\n");
2444     return GL_BACK;
2445 }
2446
2447 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2448 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2449 {
2450     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2451
2452     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2453         /* No partial locking for textures yet. */
2454         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2455
2456     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2457     if (dirty_rect)
2458     {
2459         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2460         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2461         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2462         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2463     }
2464     else
2465     {
2466         surface->dirtyRect.left = 0;
2467         surface->dirtyRect.top = 0;
2468         surface->dirtyRect.right = surface->resource.width;
2469         surface->dirtyRect.bottom = surface->resource.height;
2470     }
2471
2472     /* if the container is a texture then mark it dirty. */
2473     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2474     {
2475         TRACE("Passing to container.\n");
2476         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2477     }
2478 }
2479
2480 static BOOL surface_convert_color_to_float(struct wined3d_surface *surface,
2481         DWORD color, WINED3DCOLORVALUE *float_color)
2482 {
2483     const struct wined3d_format *format = surface->resource.format;
2484     struct wined3d_device *device = surface->resource.device;
2485
2486     switch (format->id)
2487     {
2488         case WINED3DFMT_P8_UINT:
2489             if (surface->palette)
2490             {
2491                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
2492                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
2493                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
2494             }
2495             else
2496             {
2497                 float_color->r = 0.0f;
2498                 float_color->g = 0.0f;
2499                 float_color->b = 0.0f;
2500             }
2501             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
2502             break;
2503
2504         case WINED3DFMT_B5G6R5_UNORM:
2505             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
2506             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
2507             float_color->b = (color & 0x1f) / 31.0f;
2508             float_color->a = 1.0f;
2509             break;
2510
2511         case WINED3DFMT_B8G8R8_UNORM:
2512         case WINED3DFMT_B8G8R8X8_UNORM:
2513             float_color->r = D3DCOLOR_R(color);
2514             float_color->g = D3DCOLOR_G(color);
2515             float_color->b = D3DCOLOR_B(color);
2516             float_color->a = 1.0f;
2517             break;
2518
2519         case WINED3DFMT_B8G8R8A8_UNORM:
2520             float_color->r = D3DCOLOR_R(color);
2521             float_color->g = D3DCOLOR_G(color);
2522             float_color->b = D3DCOLOR_B(color);
2523             float_color->a = D3DCOLOR_A(color);
2524             break;
2525
2526         default:
2527             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
2528             return FALSE;
2529     }
2530
2531     return TRUE;
2532 }
2533
2534 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2535 {
2536     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2537
2538     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2539
2540     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2541     {
2542         ERR("Not supported on scratch surfaces.\n");
2543         return WINED3DERR_INVALIDCALL;
2544     }
2545
2546     if (!(surface->flags & flag))
2547     {
2548         TRACE("Reloading because surface is dirty\n");
2549     }
2550     /* Reload if either the texture and sysmem have different ideas about the
2551      * color key, or the actual key values changed. */
2552     else if (!(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2553             || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2554             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2555             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2556     {
2557         TRACE("Reloading because of color keying\n");
2558         /* To perform the color key conversion we need a sysmem copy of
2559          * the surface. Make sure we have it. */
2560
2561         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2562         /* Make sure the texture is reloaded because of the color key change,
2563          * this kills performance though :( */
2564         /* TODO: This is not necessarily needed with hw palettized texture support. */
2565         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2566     }
2567     else
2568     {
2569         TRACE("surface is already in texture\n");
2570         return WINED3D_OK;
2571     }
2572
2573     /* No partial locking for textures yet. */
2574     surface_load_location(surface, flag, NULL);
2575     surface_evict_sysmem(surface);
2576
2577     return WINED3D_OK;
2578 }
2579
2580 /* See also float_16_to_32() in wined3d_private.h */
2581 static inline unsigned short float_32_to_16(const float *in)
2582 {
2583     int exp = 0;
2584     float tmp = fabsf(*in);
2585     unsigned int mantissa;
2586     unsigned short ret;
2587
2588     /* Deal with special numbers */
2589     if (*in == 0.0f)
2590         return 0x0000;
2591     if (isnan(*in))
2592         return 0x7c01;
2593     if (isinf(*in))
2594         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2595
2596     if (tmp < powf(2, 10))
2597     {
2598         do
2599         {
2600             tmp = tmp * 2.0f;
2601             exp--;
2602         } while (tmp < powf(2, 10));
2603     }
2604     else if (tmp >= powf(2, 11))
2605     {
2606         do
2607         {
2608             tmp /= 2.0f;
2609             exp++;
2610         } while (tmp >= powf(2, 11));
2611     }
2612
2613     mantissa = (unsigned int)tmp;
2614     if (tmp - mantissa >= 0.5f)
2615         ++mantissa; /* Round to nearest, away from zero. */
2616
2617     exp += 10;  /* Normalize the mantissa. */
2618     exp += 15;  /* Exponent is encoded with excess 15. */
2619
2620     if (exp > 30) /* too big */
2621     {
2622         ret = 0x7c00; /* INF */
2623     }
2624     else if (exp <= 0)
2625     {
2626         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2627         while (exp <= 0)
2628         {
2629             mantissa = mantissa >> 1;
2630             ++exp;
2631         }
2632         ret = mantissa & 0x3ff;
2633     }
2634     else
2635     {
2636         ret = (exp << 10) | (mantissa & 0x3ff);
2637     }
2638
2639     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2640     return ret;
2641 }
2642
2643 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2644 {
2645     ULONG refcount;
2646
2647     TRACE("Surface %p, container %p of type %#x.\n",
2648             surface, surface->container.u.base, surface->container.type);
2649
2650     switch (surface->container.type)
2651     {
2652         case WINED3D_CONTAINER_TEXTURE:
2653             return wined3d_texture_incref(surface->container.u.texture);
2654
2655         case WINED3D_CONTAINER_SWAPCHAIN:
2656             return wined3d_swapchain_incref(surface->container.u.swapchain);
2657
2658         default:
2659             ERR("Unhandled container type %#x.\n", surface->container.type);
2660         case WINED3D_CONTAINER_NONE:
2661             break;
2662     }
2663
2664     refcount = InterlockedIncrement(&surface->resource.ref);
2665     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2666
2667     return refcount;
2668 }
2669
2670 /* Do not call while under the GL lock. */
2671 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2672 {
2673     ULONG refcount;
2674
2675     TRACE("Surface %p, container %p of type %#x.\n",
2676             surface, surface->container.u.base, surface->container.type);
2677
2678     switch (surface->container.type)
2679     {
2680         case WINED3D_CONTAINER_TEXTURE:
2681             return wined3d_texture_decref(surface->container.u.texture);
2682
2683         case WINED3D_CONTAINER_SWAPCHAIN:
2684             return wined3d_swapchain_decref(surface->container.u.swapchain);
2685
2686         default:
2687             ERR("Unhandled container type %#x.\n", surface->container.type);
2688         case WINED3D_CONTAINER_NONE:
2689             break;
2690     }
2691
2692     refcount = InterlockedDecrement(&surface->resource.ref);
2693     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2694
2695     if (!refcount)
2696     {
2697         surface->surface_ops->surface_cleanup(surface);
2698         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2699
2700         TRACE("Destroyed surface %p.\n", surface);
2701         HeapFree(GetProcessHeap(), 0, surface);
2702     }
2703
2704     return refcount;
2705 }
2706
2707 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2708 {
2709     return resource_set_priority(&surface->resource, priority);
2710 }
2711
2712 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2713 {
2714     return resource_get_priority(&surface->resource);
2715 }
2716
2717 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2718 {
2719     TRACE("surface %p.\n", surface);
2720
2721     surface->surface_ops->surface_preload(surface);
2722 }
2723
2724 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2725 {
2726     TRACE("surface %p.\n", surface);
2727
2728     return surface->resource.parent;
2729 }
2730
2731 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2732 {
2733     TRACE("surface %p.\n", surface);
2734
2735     return &surface->resource;
2736 }
2737
2738 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2739 {
2740     TRACE("surface %p, flags %#x.\n", surface, flags);
2741
2742     switch (flags)
2743     {
2744         case WINEDDGBS_CANBLT:
2745         case WINEDDGBS_ISBLTDONE:
2746             return WINED3D_OK;
2747
2748         default:
2749             return WINED3DERR_INVALIDCALL;
2750     }
2751 }
2752
2753 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2754 {
2755     TRACE("surface %p, flags %#x.\n", surface, flags);
2756
2757     /* XXX: DDERR_INVALIDSURFACETYPE */
2758
2759     switch (flags)
2760     {
2761         case WINEDDGFS_CANFLIP:
2762         case WINEDDGFS_ISFLIPDONE:
2763             return WINED3D_OK;
2764
2765         default:
2766             return WINED3DERR_INVALIDCALL;
2767     }
2768 }
2769
2770 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2771 {
2772     TRACE("surface %p.\n", surface);
2773
2774     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2775     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2776 }
2777
2778 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2779 {
2780     TRACE("surface %p.\n", surface);
2781
2782     /* So far we don't lose anything :) */
2783     surface->flags &= ~SFLAG_LOST;
2784     return WINED3D_OK;
2785 }
2786
2787 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2788 {
2789     TRACE("surface %p, palette %p.\n", surface, palette);
2790
2791     if (surface->palette == palette)
2792     {
2793         TRACE("Nop palette change.\n");
2794         return WINED3D_OK;
2795     }
2796
2797     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2798         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2799
2800     surface->palette = palette;
2801
2802     if (palette)
2803     {
2804         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2805             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2806
2807         surface->surface_ops->surface_realize_palette(surface);
2808     }
2809
2810     return WINED3D_OK;
2811 }
2812
2813 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2814         DWORD flags, const WINEDDCOLORKEY *color_key)
2815 {
2816     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
2817
2818     if (flags & WINEDDCKEY_COLORSPACE)
2819     {
2820         FIXME(" colorkey value not supported (%08x) !\n", flags);
2821         return WINED3DERR_INVALIDCALL;
2822     }
2823
2824     /* Dirtify the surface, but only if a key was changed. */
2825     if (color_key)
2826     {
2827         switch (flags & ~WINEDDCKEY_COLORSPACE)
2828         {
2829             case WINEDDCKEY_DESTBLT:
2830                 surface->DestBltCKey = *color_key;
2831                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
2832                 break;
2833
2834             case WINEDDCKEY_DESTOVERLAY:
2835                 surface->DestOverlayCKey = *color_key;
2836                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
2837                 break;
2838
2839             case WINEDDCKEY_SRCOVERLAY:
2840                 surface->SrcOverlayCKey = *color_key;
2841                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
2842                 break;
2843
2844             case WINEDDCKEY_SRCBLT:
2845                 surface->SrcBltCKey = *color_key;
2846                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
2847                 break;
2848         }
2849     }
2850     else
2851     {
2852         switch (flags & ~WINEDDCKEY_COLORSPACE)
2853         {
2854             case WINEDDCKEY_DESTBLT:
2855                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
2856                 break;
2857
2858             case WINEDDCKEY_DESTOVERLAY:
2859                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
2860                 break;
2861
2862             case WINEDDCKEY_SRCOVERLAY:
2863                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
2864                 break;
2865
2866             case WINEDDCKEY_SRCBLT:
2867                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
2868                 break;
2869         }
2870     }
2871
2872     return WINED3D_OK;
2873 }
2874
2875 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
2876 {
2877     TRACE("surface %p.\n", surface);
2878
2879     return surface->palette;
2880 }
2881
2882 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
2883 {
2884     const struct wined3d_format *format = surface->resource.format;
2885     DWORD pitch;
2886
2887     TRACE("surface %p.\n", surface);
2888
2889     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
2890     {
2891         /* Since compressed formats are block based, pitch means the amount of
2892          * bytes to the next row of block rather than the next row of pixels. */
2893         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
2894         pitch = row_block_count * format->block_byte_count;
2895     }
2896     else
2897     {
2898         unsigned char alignment = surface->resource.device->surface_alignment;
2899         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
2900         pitch = (pitch + alignment - 1) & ~(alignment - 1);
2901     }
2902
2903     TRACE("Returning %u.\n", pitch);
2904
2905     return pitch;
2906 }
2907
2908 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
2909 {
2910     TRACE("surface %p, mem %p.\n", surface, mem);
2911
2912     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
2913     {
2914         WARN("Surface is locked or the DC is in use.\n");
2915         return WINED3DERR_INVALIDCALL;
2916     }
2917
2918     return surface->surface_ops->surface_set_mem(surface, mem);
2919 }
2920
2921 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
2922 {
2923     LONG w, h;
2924
2925     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
2926
2927     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2928     {
2929         WARN("Not an overlay surface.\n");
2930         return WINEDDERR_NOTAOVERLAYSURFACE;
2931     }
2932
2933     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
2934     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
2935     surface->overlay_destrect.left = x;
2936     surface->overlay_destrect.top = y;
2937     surface->overlay_destrect.right = x + w;
2938     surface->overlay_destrect.bottom = y + h;
2939
2940     surface->surface_ops->surface_draw_overlay(surface);
2941
2942     return WINED3D_OK;
2943 }
2944
2945 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
2946 {
2947     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
2948
2949     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2950     {
2951         TRACE("Not an overlay surface.\n");
2952         return WINEDDERR_NOTAOVERLAYSURFACE;
2953     }
2954
2955     if (!surface->overlay_dest)
2956     {
2957         TRACE("Overlay not visible.\n");
2958         *x = 0;
2959         *y = 0;
2960         return WINEDDERR_OVERLAYNOTVISIBLE;
2961     }
2962
2963     *x = surface->overlay_destrect.left;
2964     *y = surface->overlay_destrect.top;
2965
2966     TRACE("Returning position %d, %d.\n", *x, *y);
2967
2968     return WINED3D_OK;
2969 }
2970
2971 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
2972         DWORD flags, struct wined3d_surface *ref)
2973 {
2974     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
2975
2976     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2977     {
2978         TRACE("Not an overlay surface.\n");
2979         return WINEDDERR_NOTAOVERLAYSURFACE;
2980     }
2981
2982     return WINED3D_OK;
2983 }
2984
2985 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
2986         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
2987 {
2988     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
2989             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
2990
2991     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2992     {
2993         WARN("Not an overlay surface.\n");
2994         return WINEDDERR_NOTAOVERLAYSURFACE;
2995     }
2996     else if (!dst_surface)
2997     {
2998         WARN("Dest surface is NULL.\n");
2999         return WINED3DERR_INVALIDCALL;
3000     }
3001
3002     if (src_rect)
3003     {
3004         surface->overlay_srcrect = *src_rect;
3005     }
3006     else
3007     {
3008         surface->overlay_srcrect.left = 0;
3009         surface->overlay_srcrect.top = 0;
3010         surface->overlay_srcrect.right = surface->resource.width;
3011         surface->overlay_srcrect.bottom = surface->resource.height;
3012     }
3013
3014     if (dst_rect)
3015     {
3016         surface->overlay_destrect = *dst_rect;
3017     }
3018     else
3019     {
3020         surface->overlay_destrect.left = 0;
3021         surface->overlay_destrect.top = 0;
3022         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3023         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3024     }
3025
3026     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3027     {
3028         list_remove(&surface->overlay_entry);
3029     }
3030
3031     if (flags & WINEDDOVER_SHOW)
3032     {
3033         if (surface->overlay_dest != dst_surface)
3034         {
3035             surface->overlay_dest = dst_surface;
3036             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3037         }
3038     }
3039     else if (flags & WINEDDOVER_HIDE)
3040     {
3041         /* tests show that the rectangles are erased on hide */
3042         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3043         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3044         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3045         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3046         surface->overlay_dest = NULL;
3047     }
3048
3049     surface->surface_ops->surface_draw_overlay(surface);
3050
3051     return WINED3D_OK;
3052 }
3053
3054 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3055 {
3056     TRACE("surface %p, clipper %p.\n", surface, clipper);
3057
3058     surface->clipper = clipper;
3059
3060     return WINED3D_OK;
3061 }
3062
3063 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3064 {
3065     TRACE("surface %p.\n", surface);
3066
3067     return surface->clipper;
3068 }
3069
3070 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3071 {
3072     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3073
3074     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3075
3076     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3077     {
3078         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3079         return WINED3DERR_INVALIDCALL;
3080     }
3081
3082     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3083             surface->pow2Width, surface->pow2Height);
3084     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3085     surface->resource.format = format;
3086
3087     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3088     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3089             format->glFormat, format->glInternal, format->glType);
3090
3091     return WINED3D_OK;
3092 }
3093
3094 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3095         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3096 {
3097     unsigned short *dst_s;
3098     const float *src_f;
3099     unsigned int x, y;
3100
3101     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3102
3103     for (y = 0; y < h; ++y)
3104     {
3105         src_f = (const float *)(src + y * pitch_in);
3106         dst_s = (unsigned short *) (dst + y * pitch_out);
3107         for (x = 0; x < w; ++x)
3108         {
3109             dst_s[x] = float_32_to_16(src_f + x);
3110         }
3111     }
3112 }
3113
3114 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3115         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3116 {
3117     static const unsigned char convert_5to8[] =
3118     {
3119         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3120         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3121         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3122         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3123     };
3124     static const unsigned char convert_6to8[] =
3125     {
3126         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3127         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3128         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3129         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3130         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3131         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3132         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3133         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3134     };
3135     unsigned int x, y;
3136
3137     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3138
3139     for (y = 0; y < h; ++y)
3140     {
3141         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3142         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3143         for (x = 0; x < w; ++x)
3144         {
3145             WORD pixel = src_line[x];
3146             dst_line[x] = 0xff000000
3147                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3148                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3149                     | convert_5to8[(pixel & 0x001f)];
3150         }
3151     }
3152 }
3153
3154 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3155         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3156 {
3157     unsigned int x, y;
3158
3159     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3160
3161     for (y = 0; y < h; ++y)
3162     {
3163         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3164         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3165
3166         for (x = 0; x < w; ++x)
3167         {
3168             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3169         }
3170     }
3171 }
3172
3173 static inline BYTE cliptobyte(int x)
3174 {
3175     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3176 }
3177
3178 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3179         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3180 {
3181     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3182     unsigned int x, y;
3183
3184     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3185
3186     for (y = 0; y < h; ++y)
3187     {
3188         const BYTE *src_line = src + y * pitch_in;
3189         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3190         for (x = 0; x < w; ++x)
3191         {
3192             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3193              *     C = Y - 16; D = U - 128; E = V - 128;
3194              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3195              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3196              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3197              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3198              * U and V are shared between the pixels. */
3199             if (!(x & 1)) /* For every even pixel, read new U and V. */
3200             {
3201                 d = (int) src_line[1] - 128;
3202                 e = (int) src_line[3] - 128;
3203                 r2 = 409 * e + 128;
3204                 g2 = - 100 * d - 208 * e + 128;
3205                 b2 = 516 * d + 128;
3206             }
3207             c2 = 298 * ((int) src_line[0] - 16);
3208             dst_line[x] = 0xff000000
3209                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3210                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3211                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3212                 /* Scale RGB values to 0..255 range,
3213                  * then clip them if still not in range (may be negative),
3214                  * then shift them within DWORD if necessary. */
3215             src_line += 2;
3216         }
3217     }
3218 }
3219
3220 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3221         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3222 {
3223     unsigned int x, y;
3224     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3225
3226     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3227
3228     for (y = 0; y < h; ++y)
3229     {
3230         const BYTE *src_line = src + y * pitch_in;
3231         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3232         for (x = 0; x < w; ++x)
3233         {
3234             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3235              *     C = Y - 16; D = U - 128; E = V - 128;
3236              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3237              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3238              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3239              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3240              * U and V are shared between the pixels. */
3241             if (!(x & 1)) /* For every even pixel, read new U and V. */
3242             {
3243                 d = (int) src_line[1] - 128;
3244                 e = (int) src_line[3] - 128;
3245                 r2 = 409 * e + 128;
3246                 g2 = - 100 * d - 208 * e + 128;
3247                 b2 = 516 * d + 128;
3248             }
3249             c2 = 298 * ((int) src_line[0] - 16);
3250             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3251                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3252                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3253                 /* Scale RGB values to 0..255 range,
3254                  * then clip them if still not in range (may be negative),
3255                  * then shift them within DWORD if necessary. */
3256             src_line += 2;
3257         }
3258     }
3259 }
3260
3261 struct d3dfmt_convertor_desc
3262 {
3263     enum wined3d_format_id from, to;
3264     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3265 };
3266
3267 static const struct d3dfmt_convertor_desc convertors[] =
3268 {
3269     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3270     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3271     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3272     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3273     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3274 };
3275
3276 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3277         enum wined3d_format_id to)
3278 {
3279     unsigned int i;
3280
3281     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3282     {
3283         if (convertors[i].from == from && convertors[i].to == to)
3284             return &convertors[i];
3285     }
3286
3287     return NULL;
3288 }
3289
3290 /*****************************************************************************
3291  * surface_convert_format
3292  *
3293  * Creates a duplicate of a surface in a different format. Is used by Blt to
3294  * blit between surfaces with different formats.
3295  *
3296  * Parameters
3297  *  source: Source surface
3298  *  fmt: Requested destination format
3299  *
3300  *****************************************************************************/
3301 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3302 {
3303     const struct d3dfmt_convertor_desc *conv;
3304     WINED3DLOCKED_RECT lock_src, lock_dst;
3305     struct wined3d_surface *ret = NULL;
3306     HRESULT hr;
3307
3308     conv = find_convertor(source->resource.format->id, to_fmt);
3309     if (!conv)
3310     {
3311         FIXME("Cannot find a conversion function from format %s to %s.\n",
3312                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3313         return NULL;
3314     }
3315
3316     wined3d_surface_create(source->resource.device, source->resource.width,
3317             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3318             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3319             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3320     if (!ret)
3321     {
3322         ERR("Failed to create a destination surface for conversion.\n");
3323         return NULL;
3324     }
3325
3326     memset(&lock_src, 0, sizeof(lock_src));
3327     memset(&lock_dst, 0, sizeof(lock_dst));
3328
3329     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3330     if (FAILED(hr))
3331     {
3332         ERR("Failed to lock the source surface.\n");
3333         wined3d_surface_decref(ret);
3334         return NULL;
3335     }
3336     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3337     if (FAILED(hr))
3338     {
3339         ERR("Failed to lock the destination surface.\n");
3340         wined3d_surface_unmap(source);
3341         wined3d_surface_decref(ret);
3342         return NULL;
3343     }
3344
3345     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3346             source->resource.width, source->resource.height);
3347
3348     wined3d_surface_unmap(ret);
3349     wined3d_surface_unmap(source);
3350
3351     return ret;
3352 }
3353
3354 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3355         unsigned int bpp, UINT pitch, DWORD color)
3356 {
3357     BYTE *first;
3358     int x, y;
3359
3360     /* Do first row */
3361
3362 #define COLORFILL_ROW(type) \
3363 do { \
3364     type *d = (type *)buf; \
3365     for (x = 0; x < width; ++x) \
3366         d[x] = (type)color; \
3367 } while(0)
3368
3369     switch (bpp)
3370     {
3371         case 1:
3372             COLORFILL_ROW(BYTE);
3373             break;
3374
3375         case 2:
3376             COLORFILL_ROW(WORD);
3377             break;
3378
3379         case 3:
3380         {
3381             BYTE *d = buf;
3382             for (x = 0; x < width; ++x, d += 3)
3383             {
3384                 d[0] = (color      ) & 0xFF;
3385                 d[1] = (color >>  8) & 0xFF;
3386                 d[2] = (color >> 16) & 0xFF;
3387             }
3388             break;
3389         }
3390         case 4:
3391             COLORFILL_ROW(DWORD);
3392             break;
3393
3394         default:
3395             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3396             return WINED3DERR_NOTAVAILABLE;
3397     }
3398
3399 #undef COLORFILL_ROW
3400
3401     /* Now copy first row. */
3402     first = buf;
3403     for (y = 1; y < height; ++y)
3404     {
3405         buf += pitch;
3406         memcpy(buf, first, width * bpp);
3407     }
3408
3409     return WINED3D_OK;
3410 }
3411
3412 /* Do not call while under the GL lock. */
3413 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
3414         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
3415         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
3416 {
3417     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
3418             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
3419             flags, fx, debug_d3dtexturefiltertype(filter));
3420
3421     return dst_surface->surface_ops->surface_blt(dst_surface,
3422             dst_rect, src_surface, src_rect, flags, fx, filter);
3423 }
3424
3425 /* Do not call while under the GL lock. */
3426 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
3427         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
3428 {
3429     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, trans %#x.\n",
3430             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
3431
3432     return dst_surface->surface_ops->surface_bltfast(dst_surface,
3433             dst_x, dst_y, src_surface, src_rect, trans);
3434 }
3435
3436 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3437 {
3438     TRACE("surface %p.\n", surface);
3439
3440     if (!(surface->flags & SFLAG_LOCKED))
3441     {
3442         WARN("Trying to unmap unmapped surface.\n");
3443         return WINEDDERR_NOTLOCKED;
3444     }
3445     surface->flags &= ~SFLAG_LOCKED;
3446
3447     surface->surface_ops->surface_unmap(surface);
3448
3449     return WINED3D_OK;
3450 }
3451
3452 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3453         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3454 {
3455     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3456             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3457
3458     if (surface->flags & SFLAG_LOCKED)
3459     {
3460         WARN("Surface is already mapped.\n");
3461         return WINED3DERR_INVALIDCALL;
3462     }
3463     surface->flags |= SFLAG_LOCKED;
3464
3465     if (!(surface->flags & SFLAG_LOCKABLE))
3466         WARN("Trying to lock unlockable surface.\n");
3467
3468     surface->surface_ops->surface_map(surface, rect, flags);
3469
3470     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3471
3472     if (!rect)
3473     {
3474         locked_rect->pBits = surface->resource.allocatedMemory;
3475         surface->lockedRect.left = 0;
3476         surface->lockedRect.top = 0;
3477         surface->lockedRect.right = surface->resource.width;
3478         surface->lockedRect.bottom = surface->resource.height;
3479     }
3480     else
3481     {
3482         const struct wined3d_format *format = surface->resource.format;
3483
3484         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3485         {
3486             /* Compressed textures are block based, so calculate the offset of
3487              * the block that contains the top-left pixel of the locked rectangle. */
3488             locked_rect->pBits = surface->resource.allocatedMemory
3489                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3490                     + ((rect->left / format->block_width) * format->block_byte_count);
3491         }
3492         else
3493         {
3494             locked_rect->pBits = surface->resource.allocatedMemory
3495                     + (locked_rect->Pitch * rect->top)
3496                     + (rect->left * format->byte_count);
3497         }
3498         surface->lockedRect.left = rect->left;
3499         surface->lockedRect.top = rect->top;
3500         surface->lockedRect.right = rect->right;
3501         surface->lockedRect.bottom = rect->bottom;
3502     }
3503
3504     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3505     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3506
3507     return WINED3D_OK;
3508 }
3509
3510 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3511 {
3512     HRESULT hr;
3513
3514     TRACE("surface %p, dc %p.\n", surface, dc);
3515
3516     if (surface->flags & SFLAG_USERPTR)
3517     {
3518         ERR("Not supported on surfaces with application-provided memory.\n");
3519         return WINEDDERR_NODC;
3520     }
3521
3522     /* Give more detailed info for ddraw. */
3523     if (surface->flags & SFLAG_DCINUSE)
3524         return WINEDDERR_DCALREADYCREATED;
3525
3526     /* Can't GetDC if the surface is locked. */
3527     if (surface->flags & SFLAG_LOCKED)
3528         return WINED3DERR_INVALIDCALL;
3529
3530     hr = surface->surface_ops->surface_getdc(surface);
3531     if (FAILED(hr))
3532         return hr;
3533
3534     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3535             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3536     {
3537         /* GetDC on palettized formats is unsupported in D3D9, and the method
3538          * is missing in D3D8, so this should only be used for DX <=7
3539          * surfaces (with non-device palettes). */
3540         const PALETTEENTRY *pal = NULL;
3541
3542         if (surface->palette)
3543         {
3544             pal = surface->palette->palents;
3545         }
3546         else
3547         {
3548             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3549             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3550
3551             if (dds_primary && dds_primary->palette)
3552                 pal = dds_primary->palette->palents;
3553         }
3554
3555         if (pal)
3556         {
3557             RGBQUAD col[256];
3558             unsigned int i;
3559
3560             for (i = 0; i < 256; ++i)
3561             {
3562                 col[i].rgbRed = pal[i].peRed;
3563                 col[i].rgbGreen = pal[i].peGreen;
3564                 col[i].rgbBlue = pal[i].peBlue;
3565                 col[i].rgbReserved = 0;
3566             }
3567             SetDIBColorTable(surface->hDC, 0, 256, col);
3568         }
3569     }
3570
3571     surface->flags |= SFLAG_DCINUSE;
3572
3573     *dc = surface->hDC;
3574     TRACE("Returning dc %p.\n", *dc);
3575
3576     return WINED3D_OK;
3577 }
3578
3579 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3580 {
3581     TRACE("surface %p, dc %p.\n", surface, dc);
3582
3583     if (!(surface->flags & SFLAG_DCINUSE))
3584         return WINEDDERR_NODC;
3585
3586     if (surface->hDC != dc)
3587     {
3588         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3589                 dc, surface->hDC);
3590         return WINEDDERR_NODC;
3591     }
3592
3593     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3594     {
3595         /* Copy the contents of the DIB over to the PBO. */
3596         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->dib.bitmap_size);
3597     }
3598
3599     /* We locked first, so unlock now. */
3600     wined3d_surface_unmap(surface);
3601
3602     surface->flags &= ~SFLAG_DCINUSE;
3603
3604     return WINED3D_OK;
3605 }
3606
3607 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3608 {
3609     struct wined3d_swapchain *swapchain;
3610     HRESULT hr;
3611
3612     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3613
3614     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
3615     {
3616         ERR("Flipped surface is not on a swapchain.\n");
3617         return WINEDDERR_NOTFLIPPABLE;
3618     }
3619     swapchain = surface->container.u.swapchain;
3620
3621     hr = surface->surface_ops->surface_flip(surface, override);
3622     if (FAILED(hr))
3623         return hr;
3624
3625     /* Just overwrite the swapchain presentation interval. This is ok because
3626      * only ddraw apps can call Flip, and only d3d8 and d3d9 applications
3627      * specify the presentation interval. */
3628     if (!(flags & (WINEDDFLIP_NOVSYNC | WINEDDFLIP_INTERVAL2 | WINEDDFLIP_INTERVAL3 | WINEDDFLIP_INTERVAL4)))
3629         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_ONE;
3630     else if (flags & WINEDDFLIP_NOVSYNC)
3631         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
3632     else if (flags & WINEDDFLIP_INTERVAL2)
3633         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_TWO;
3634     else if (flags & WINEDDFLIP_INTERVAL3)
3635         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_THREE;
3636     else
3637         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_FOUR;
3638
3639     return wined3d_swapchain_present(swapchain, NULL, NULL, swapchain->win_handle, NULL, 0);
3640 }
3641
3642 /* Do not call while under the GL lock. */
3643 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3644 {
3645     struct wined3d_device *device = surface->resource.device;
3646
3647     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3648
3649     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3650     {
3651         struct wined3d_texture *texture = surface->container.u.texture;
3652
3653         TRACE("Passing to container (%p).\n", texture);
3654         texture->texture_ops->texture_preload(texture, srgb);
3655     }
3656     else
3657     {
3658         struct wined3d_context *context = NULL;
3659
3660         TRACE("(%p) : About to load surface\n", surface);
3661
3662         if (!device->isInDraw) context = context_acquire(device, NULL);
3663
3664         if (surface->resource.format->id == WINED3DFMT_P8_UINT
3665                 || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3666         {
3667             if (palette9_changed(surface))
3668             {
3669                 TRACE("Reloading surface because the d3d8/9 palette was changed\n");
3670                 /* TODO: This is not necessarily needed with hw palettized texture support */
3671                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3672                 /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
3673                 surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
3674             }
3675         }
3676
3677         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3678
3679         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3680         {
3681             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3682             GLclampf tmp;
3683             tmp = 0.9f;
3684             ENTER_GL();
3685             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3686             LEAVE_GL();
3687         }
3688
3689         if (context) context_release(context);
3690     }
3691 }
3692
3693 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3694 {
3695     if (!surface->resource.allocatedMemory)
3696     {
3697         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3698                 surface->resource.size + RESOURCE_ALIGNMENT);
3699         if (!surface->resource.heapMemory)
3700         {
3701             ERR("Out of memory\n");
3702             return FALSE;
3703         }
3704         surface->resource.allocatedMemory =
3705             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3706     }
3707     else
3708     {
3709         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3710     }
3711
3712     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3713
3714     return TRUE;
3715 }
3716
3717 /* Read the framebuffer back into the surface */
3718 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3719 {
3720     struct wined3d_device *device = surface->resource.device;
3721     const struct wined3d_gl_info *gl_info;
3722     struct wined3d_context *context;
3723     BYTE *mem;
3724     GLint fmt;
3725     GLint type;
3726     BYTE *row, *top, *bottom;
3727     int i;
3728     BOOL bpp;
3729     RECT local_rect;
3730     BOOL srcIsUpsideDown;
3731     GLint rowLen = 0;
3732     GLint skipPix = 0;
3733     GLint skipRow = 0;
3734
3735     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3736         static BOOL warned = FALSE;
3737         if(!warned) {
3738             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3739             warned = TRUE;
3740         }
3741         return;
3742     }
3743
3744     context = context_acquire(device, surface);
3745     context_apply_blit_state(context, device);
3746     gl_info = context->gl_info;
3747
3748     ENTER_GL();
3749
3750     /* Select the correct read buffer, and give some debug output.
3751      * There is no need to keep track of the current read buffer or reset it, every part of the code
3752      * that reads sets the read buffer as desired.
3753      */
3754     if (surface_is_offscreen(surface))
3755     {
3756         /* Mapping the primary render target which is not on a swapchain.
3757          * Read from the back buffer. */
3758         TRACE("Mapping offscreen render target.\n");
3759         glReadBuffer(device->offscreenBuffer);
3760         srcIsUpsideDown = TRUE;
3761     }
3762     else
3763     {
3764         /* Onscreen surfaces are always part of a swapchain */
3765         GLenum buffer = surface_get_gl_buffer(surface);
3766         TRACE("Mapping %#x buffer.\n", buffer);
3767         glReadBuffer(buffer);
3768         checkGLcall("glReadBuffer");
3769         srcIsUpsideDown = FALSE;
3770     }
3771
3772     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3773     if (!rect)
3774     {
3775         local_rect.left = 0;
3776         local_rect.top = 0;
3777         local_rect.right = surface->resource.width;
3778         local_rect.bottom = surface->resource.height;
3779     }
3780     else
3781     {
3782         local_rect = *rect;
3783     }
3784     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3785
3786     switch (surface->resource.format->id)
3787     {
3788         case WINED3DFMT_P8_UINT:
3789         {
3790             if (primary_render_target_is_p8(device))
3791             {
3792                 /* In case of P8 render targets the index is stored in the alpha component */
3793                 fmt = GL_ALPHA;
3794                 type = GL_UNSIGNED_BYTE;
3795                 mem = dest;
3796                 bpp = surface->resource.format->byte_count;
3797             }
3798             else
3799             {
3800                 /* GL can't return palettized data, so read ARGB pixels into a
3801                  * separate block of memory and convert them into palettized format
3802                  * in software. Slow, but if the app means to use palettized render
3803                  * targets and locks it...
3804                  *
3805                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
3806                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
3807                  * for the color channels when palettizing the colors.
3808                  */
3809                 fmt = GL_RGB;
3810                 type = GL_UNSIGNED_BYTE;
3811                 pitch *= 3;
3812                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
3813                 if (!mem)
3814                 {
3815                     ERR("Out of memory\n");
3816                     LEAVE_GL();
3817                     return;
3818                 }
3819                 bpp = surface->resource.format->byte_count * 3;
3820             }
3821         }
3822         break;
3823
3824         default:
3825             mem = dest;
3826             fmt = surface->resource.format->glFormat;
3827             type = surface->resource.format->glType;
3828             bpp = surface->resource.format->byte_count;
3829     }
3830
3831     if (surface->flags & SFLAG_PBO)
3832     {
3833         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
3834         checkGLcall("glBindBufferARB");
3835         if (mem)
3836         {
3837             ERR("mem not null for pbo -- unexpected\n");
3838             mem = NULL;
3839         }
3840     }
3841
3842     /* Save old pixel store pack state */
3843     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
3844     checkGLcall("glGetIntegerv");
3845     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
3846     checkGLcall("glGetIntegerv");
3847     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
3848     checkGLcall("glGetIntegerv");
3849
3850     /* Setup pixel store pack state -- to glReadPixels into the correct place */
3851     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
3852     checkGLcall("glPixelStorei");
3853     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
3854     checkGLcall("glPixelStorei");
3855     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
3856     checkGLcall("glPixelStorei");
3857
3858     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
3859             local_rect.right - local_rect.left,
3860             local_rect.bottom - local_rect.top,
3861             fmt, type, mem);
3862     checkGLcall("glReadPixels");
3863
3864     /* Reset previous pixel store pack state */
3865     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
3866     checkGLcall("glPixelStorei");
3867     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
3868     checkGLcall("glPixelStorei");
3869     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
3870     checkGLcall("glPixelStorei");
3871
3872     if (surface->flags & SFLAG_PBO)
3873     {
3874         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
3875         checkGLcall("glBindBufferARB");
3876
3877         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
3878          * to get a pointer to it and perform the flipping in software. This is a lot
3879          * faster than calling glReadPixels for each line. In case we want more speed
3880          * we should rerender it flipped in a FBO and read the data back from the FBO. */
3881         if (!srcIsUpsideDown)
3882         {
3883             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
3884             checkGLcall("glBindBufferARB");
3885
3886             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
3887             checkGLcall("glMapBufferARB");
3888         }
3889     }
3890
3891     /* TODO: Merge this with the palettization loop below for P8 targets */
3892     if(!srcIsUpsideDown) {
3893         UINT len, off;
3894         /* glReadPixels returns the image upside down, and there is no way to prevent this.
3895             Flip the lines in software */
3896         len = (local_rect.right - local_rect.left) * bpp;
3897         off = local_rect.left * bpp;
3898
3899         row = HeapAlloc(GetProcessHeap(), 0, len);
3900         if(!row) {
3901             ERR("Out of memory\n");
3902             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
3903                 HeapFree(GetProcessHeap(), 0, mem);
3904             LEAVE_GL();
3905             return;
3906         }
3907
3908         top = mem + pitch * local_rect.top;
3909         bottom = mem + pitch * (local_rect.bottom - 1);
3910         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
3911             memcpy(row, top + off, len);
3912             memcpy(top + off, bottom + off, len);
3913             memcpy(bottom + off, row, len);
3914             top += pitch;
3915             bottom -= pitch;
3916         }
3917         HeapFree(GetProcessHeap(), 0, row);
3918
3919         /* Unmap the temp PBO buffer */
3920         if (surface->flags & SFLAG_PBO)
3921         {
3922             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
3923             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
3924         }
3925     }
3926
3927     LEAVE_GL();
3928     context_release(context);
3929
3930     /* For P8 textures we need to perform an inverse palette lookup. This is
3931      * done by searching for a palette index which matches the RGB value.
3932      * Note this isn't guaranteed to work when there are multiple entries for
3933      * the same color but we have no choice. In case of P8 render targets,
3934      * the index is stored in the alpha component so no conversion is needed. */
3935     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
3936     {
3937         const PALETTEENTRY *pal = NULL;
3938         DWORD width = pitch / 3;
3939         int x, y, c;
3940
3941         if (surface->palette)
3942         {
3943             pal = surface->palette->palents;
3944         }
3945         else
3946         {
3947             ERR("Palette is missing, cannot perform inverse palette lookup\n");
3948             HeapFree(GetProcessHeap(), 0, mem);
3949             return;
3950         }
3951
3952         for(y = local_rect.top; y < local_rect.bottom; y++) {
3953             for(x = local_rect.left; x < local_rect.right; x++) {
3954                 /*                      start              lines            pixels      */
3955                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
3956                 const BYTE *green = blue  + 1;
3957                 const BYTE *red = green + 1;
3958
3959                 for(c = 0; c < 256; c++) {
3960                     if(*red   == pal[c].peRed   &&
3961                        *green == pal[c].peGreen &&
3962                        *blue  == pal[c].peBlue)
3963                     {
3964                         *((BYTE *) dest + y * width + x) = c;
3965                         break;
3966                     }
3967                 }
3968             }
3969         }
3970         HeapFree(GetProcessHeap(), 0, mem);
3971     }
3972 }
3973
3974 /* Read the framebuffer contents into a texture */
3975 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
3976 {
3977     struct wined3d_device *device = surface->resource.device;
3978     const struct wined3d_gl_info *gl_info;
3979     struct wined3d_context *context;
3980
3981     if (!surface_is_offscreen(surface))
3982     {
3983         /* We would need to flip onscreen surfaces, but there's no efficient
3984          * way to do that here. It makes more sense for the caller to
3985          * explicitly go through sysmem. */
3986         ERR("Not supported for onscreen targets.\n");
3987         return;
3988     }
3989
3990     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
3991      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
3992      * states in the stateblock, and no driver was found yet that had bugs in that regard.
3993      */
3994     context = context_acquire(device, surface);
3995     gl_info = context->gl_info;
3996     device_invalidate_state(device, STATE_FRAMEBUFFER);
3997
3998     surface_prepare_texture(surface, gl_info, srgb);
3999     surface_bind_and_dirtify(surface, gl_info, srgb);
4000
4001     TRACE("Reading back offscreen render target %p.\n", surface);
4002
4003     ENTER_GL();
4004
4005     glReadBuffer(device->offscreenBuffer);
4006     checkGLcall("glReadBuffer");
4007
4008     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4009             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4010     checkGLcall("glCopyTexSubImage2D");
4011
4012     LEAVE_GL();
4013
4014     context_release(context);
4015 }
4016
4017 /* Context activation is done by the caller. */
4018 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4019         const struct wined3d_gl_info *gl_info, BOOL srgb)
4020 {
4021     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4022     CONVERT_TYPES convert;
4023     struct wined3d_format format;
4024
4025     if (surface->flags & alloc_flag) return;
4026
4027     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4028     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4029     else surface->flags &= ~SFLAG_CONVERTED;
4030
4031     surface_bind_and_dirtify(surface, gl_info, srgb);
4032     surface_allocate_surface(surface, gl_info, &format, srgb);
4033     surface->flags |= alloc_flag;
4034 }
4035
4036 /* Context activation is done by the caller. */
4037 void surface_prepare_texture(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
4038 {
4039     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4040     {
4041         struct wined3d_texture *texture = surface->container.u.texture;
4042         UINT sub_count = texture->level_count * texture->layer_count;
4043         UINT i;
4044
4045         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4046
4047         for (i = 0; i < sub_count; ++i)
4048         {
4049             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4050             surface_prepare_texture_internal(s, gl_info, srgb);
4051         }
4052
4053         return;
4054     }
4055
4056     surface_prepare_texture_internal(surface, gl_info, srgb);
4057 }
4058
4059 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4060         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4061 {
4062     struct wined3d_device *device = surface->resource.device;
4063     UINT pitch = wined3d_surface_get_pitch(surface);
4064     const struct wined3d_gl_info *gl_info;
4065     struct wined3d_context *context;
4066     RECT local_rect;
4067     UINT w, h;
4068
4069     surface_get_rect(surface, rect, &local_rect);
4070
4071     mem += local_rect.top * pitch + local_rect.left * bpp;
4072     w = local_rect.right - local_rect.left;
4073     h = local_rect.bottom - local_rect.top;
4074
4075     /* Activate the correct context for the render target */
4076     context = context_acquire(device, surface);
4077     context_apply_blit_state(context, device);
4078     gl_info = context->gl_info;
4079
4080     ENTER_GL();
4081
4082     if (!surface_is_offscreen(surface))
4083     {
4084         GLenum buffer = surface_get_gl_buffer(surface);
4085         TRACE("Unlocking %#x buffer.\n", buffer);
4086         context_set_draw_buffer(context, buffer);
4087
4088         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4089         glPixelZoom(1.0f, -1.0f);
4090     }
4091     else
4092     {
4093         /* Primary offscreen render target */
4094         TRACE("Offscreen render target.\n");
4095         context_set_draw_buffer(context, device->offscreenBuffer);
4096
4097         glPixelZoom(1.0f, 1.0f);
4098     }
4099
4100     glRasterPos3i(local_rect.left, local_rect.top, 1);
4101     checkGLcall("glRasterPos3i");
4102
4103     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4104     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4105
4106     if (surface->flags & SFLAG_PBO)
4107     {
4108         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4109         checkGLcall("glBindBufferARB");
4110     }
4111
4112     glDrawPixels(w, h, fmt, type, mem);
4113     checkGLcall("glDrawPixels");
4114
4115     if (surface->flags & SFLAG_PBO)
4116     {
4117         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4118         checkGLcall("glBindBufferARB");
4119     }
4120
4121     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4122     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4123
4124     LEAVE_GL();
4125
4126     if (wined3d_settings.strict_draw_ordering
4127             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4128             && surface->container.u.swapchain->front_buffer == surface))
4129         wglFlush();
4130
4131     context_release(context);
4132 }
4133
4134 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4135         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4136 {
4137     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4138     const struct wined3d_device *device = surface->resource.device;
4139     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4140     BOOL blit_supported = FALSE;
4141
4142     /* Copy the default values from the surface. Below we might perform fixups */
4143     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4144     *format = *surface->resource.format;
4145     *convert = NO_CONVERSION;
4146
4147     /* Ok, now look if we have to do any conversion */
4148     switch (surface->resource.format->id)
4149     {
4150         case WINED3DFMT_P8_UINT:
4151             /* Below the call to blit_supported is disabled for Wine 1.2
4152              * because the function isn't operating correctly yet. At the
4153              * moment 8-bit blits are handled in software and if certain GL
4154              * extensions are around, surface conversion is performed at
4155              * upload time. The blit_supported call recognizes it as a
4156              * destination fixup. This type of upload 'fixup' and 8-bit to
4157              * 8-bit blits need to be handled by the blit_shader.
4158              * TODO: get rid of this #if 0. */
4159 #if 0
4160             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4161                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4162                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4163 #endif
4164             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4165
4166             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4167              * texturing. Further also use conversion in case of color keying.
4168              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4169              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4170              * conflicts with this.
4171              */
4172             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4173                     || colorkey_active || !use_texturing)
4174             {
4175                 format->glFormat = GL_RGBA;
4176                 format->glInternal = GL_RGBA;
4177                 format->glType = GL_UNSIGNED_BYTE;
4178                 format->conv_byte_count = 4;
4179                 if (colorkey_active)
4180                     *convert = CONVERT_PALETTED_CK;
4181                 else
4182                     *convert = CONVERT_PALETTED;
4183             }
4184             break;
4185
4186         case WINED3DFMT_B2G3R3_UNORM:
4187             /* **********************
4188                 GL_UNSIGNED_BYTE_3_3_2
4189                 ********************** */
4190             if (colorkey_active) {
4191                 /* This texture format will never be used.. So do not care about color keying
4192                     up until the point in time it will be needed :-) */
4193                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4194             }
4195             break;
4196
4197         case WINED3DFMT_B5G6R5_UNORM:
4198             if (colorkey_active)
4199             {
4200                 *convert = CONVERT_CK_565;
4201                 format->glFormat = GL_RGBA;
4202                 format->glInternal = GL_RGB5_A1;
4203                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4204                 format->conv_byte_count = 2;
4205             }
4206             break;
4207
4208         case WINED3DFMT_B5G5R5X1_UNORM:
4209             if (colorkey_active)
4210             {
4211                 *convert = CONVERT_CK_5551;
4212                 format->glFormat = GL_BGRA;
4213                 format->glInternal = GL_RGB5_A1;
4214                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4215                 format->conv_byte_count = 2;
4216             }
4217             break;
4218
4219         case WINED3DFMT_B8G8R8_UNORM:
4220             if (colorkey_active)
4221             {
4222                 *convert = CONVERT_CK_RGB24;
4223                 format->glFormat = GL_RGBA;
4224                 format->glInternal = GL_RGBA8;
4225                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4226                 format->conv_byte_count = 4;
4227             }
4228             break;
4229
4230         case WINED3DFMT_B8G8R8X8_UNORM:
4231             if (colorkey_active)
4232             {
4233                 *convert = CONVERT_RGB32_888;
4234                 format->glFormat = GL_RGBA;
4235                 format->glInternal = GL_RGBA8;
4236                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4237                 format->conv_byte_count = 4;
4238             }
4239             break;
4240
4241         default:
4242             break;
4243     }
4244
4245     return WINED3D_OK;
4246 }
4247
4248 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4249 {
4250     const struct wined3d_device *device = surface->resource.device;
4251     const struct wined3d_palette *pal = surface->palette;
4252     BOOL index_in_alpha = FALSE;
4253     unsigned int i;
4254
4255     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4256      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4257      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4258      * duplicate entries. Store the color key in the unused alpha component to speed the
4259      * download up and to make conversion unneeded. */
4260     index_in_alpha = primary_render_target_is_p8(device);
4261
4262     if (!pal)
4263     {
4264         /* In DirectDraw the palette is a property of the surface, there are no such things as device palettes. */
4265         if (device->wined3d->flags & WINED3D_PALETTE_PER_SURFACE)
4266         {
4267             ERR("This code should never get entered for DirectDraw!, expect problems\n");
4268             if (index_in_alpha)
4269             {
4270                 /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4271                  * there's no palette at this time. */
4272                 for (i = 0; i < 256; i++) table[i][3] = i;
4273             }
4274         }
4275         else
4276         {
4277             /* Direct3D >= 8 palette usage style: P8 textures use device palettes, palette entry format is A8R8G8B8,
4278              * alpha is stored in peFlags and may be used by the app if D3DPTEXTURECAPS_ALPHAPALETTE device
4279              * capability flag is present (wine does advertise this capability) */
4280             for (i = 0; i < 256; ++i)
4281             {
4282                 table[i][0] = device->palettes[device->currentPalette][i].peRed;
4283                 table[i][1] = device->palettes[device->currentPalette][i].peGreen;
4284                 table[i][2] = device->palettes[device->currentPalette][i].peBlue;
4285                 table[i][3] = device->palettes[device->currentPalette][i].peFlags;
4286             }
4287         }
4288     }
4289     else
4290     {
4291         TRACE("Using surface palette %p\n", pal);
4292         /* Get the surface's palette */
4293         for (i = 0; i < 256; ++i)
4294         {
4295             table[i][0] = pal->palents[i].peRed;
4296             table[i][1] = pal->palents[i].peGreen;
4297             table[i][2] = pal->palents[i].peBlue;
4298
4299             /* When index_in_alpha is set the palette index is stored in the
4300              * alpha component. In case of a readback we can then read
4301              * GL_ALPHA. Color keying is handled in BltOverride using a
4302              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4303              * color key itself is passed to glAlphaFunc in other cases the
4304              * alpha component of pixels that should be masked away is set to 0. */
4305             if (index_in_alpha)
4306             {
4307                 table[i][3] = i;
4308             }
4309             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4310                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4311             {
4312                 table[i][3] = 0x00;
4313             }
4314             else if (pal->flags & WINEDDPCAPS_ALPHA)
4315             {
4316                 table[i][3] = pal->palents[i].peFlags;
4317             }
4318             else
4319             {
4320                 table[i][3] = 0xFF;
4321             }
4322         }
4323     }
4324 }
4325
4326 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4327         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4328 {
4329     const BYTE *source;
4330     BYTE *dest;
4331     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4332
4333     switch (convert) {
4334         case NO_CONVERSION:
4335         {
4336             memcpy(dst, src, pitch * height);
4337             break;
4338         }
4339         case CONVERT_PALETTED:
4340         case CONVERT_PALETTED_CK:
4341         {
4342             BYTE table[256][4];
4343             unsigned int x, y;
4344
4345             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4346
4347             for (y = 0; y < height; y++)
4348             {
4349                 source = src + pitch * y;
4350                 dest = dst + outpitch * y;
4351                 /* This is an 1 bpp format, using the width here is fine */
4352                 for (x = 0; x < width; x++) {
4353                     BYTE color = *source++;
4354                     *dest++ = table[color][0];
4355                     *dest++ = table[color][1];
4356                     *dest++ = table[color][2];
4357                     *dest++ = table[color][3];
4358                 }
4359             }
4360         }
4361         break;
4362
4363         case CONVERT_CK_565:
4364         {
4365             /* Converting the 565 format in 5551 packed to emulate color-keying.
4366
4367               Note : in all these conversion, it would be best to average the averaging
4368                       pixels to get the color of the pixel that will be color-keyed to
4369                       prevent 'color bleeding'. This will be done later on if ever it is
4370                       too visible.
4371
4372               Note2: Nvidia documents say that their driver does not support alpha + color keying
4373                      on the same surface and disables color keying in such a case
4374             */
4375             unsigned int x, y;
4376             const WORD *Source;
4377             WORD *Dest;
4378
4379             TRACE("Color keyed 565\n");
4380
4381             for (y = 0; y < height; y++) {
4382                 Source = (const WORD *)(src + y * pitch);
4383                 Dest = (WORD *) (dst + y * outpitch);
4384                 for (x = 0; x < width; x++ ) {
4385                     WORD color = *Source++;
4386                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4387                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4388                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4389                         *Dest |= 0x0001;
4390                     Dest++;
4391                 }
4392             }
4393         }
4394         break;
4395
4396         case CONVERT_CK_5551:
4397         {
4398             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4399             unsigned int x, y;
4400             const WORD *Source;
4401             WORD *Dest;
4402             TRACE("Color keyed 5551\n");
4403             for (y = 0; y < height; y++) {
4404                 Source = (const WORD *)(src + y * pitch);
4405                 Dest = (WORD *) (dst + y * outpitch);
4406                 for (x = 0; x < width; x++ ) {
4407                     WORD color = *Source++;
4408                     *Dest = color;
4409                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4410                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4411                         *Dest |= (1 << 15);
4412                     else
4413                         *Dest &= ~(1 << 15);
4414                     Dest++;
4415                 }
4416             }
4417         }
4418         break;
4419
4420         case CONVERT_CK_RGB24:
4421         {
4422             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4423             unsigned int x, y;
4424             for (y = 0; y < height; y++)
4425             {
4426                 source = src + pitch * y;
4427                 dest = dst + outpitch * y;
4428                 for (x = 0; x < width; x++) {
4429                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4430                     DWORD dstcolor = color << 8;
4431                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4432                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4433                         dstcolor |= 0xff;
4434                     *(DWORD*)dest = dstcolor;
4435                     source += 3;
4436                     dest += 4;
4437                 }
4438             }
4439         }
4440         break;
4441
4442         case CONVERT_RGB32_888:
4443         {
4444             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4445             unsigned int x, y;
4446             for (y = 0; y < height; y++)
4447             {
4448                 source = src + pitch * y;
4449                 dest = dst + outpitch * y;
4450                 for (x = 0; x < width; x++) {
4451                     DWORD color = 0xffffff & *(const DWORD*)source;
4452                     DWORD dstcolor = color << 8;
4453                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4454                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4455                         dstcolor |= 0xff;
4456                     *(DWORD*)dest = dstcolor;
4457                     source += 4;
4458                     dest += 4;
4459                 }
4460             }
4461         }
4462         break;
4463
4464         default:
4465             ERR("Unsupported conversion type %#x.\n", convert);
4466     }
4467     return WINED3D_OK;
4468 }
4469
4470 BOOL palette9_changed(struct wined3d_surface *surface)
4471 {
4472     struct wined3d_device *device = surface->resource.device;
4473
4474     if (surface->palette || (surface->resource.format->id != WINED3DFMT_P8_UINT
4475             && surface->resource.format->id != WINED3DFMT_P8_UINT_A8_UNORM))
4476     {
4477         /* If a ddraw-style palette is attached assume no d3d9 palette change.
4478          * Also the palette isn't interesting if the surface format isn't P8 or A8P8
4479          */
4480         return FALSE;
4481     }
4482
4483     if (surface->palette9)
4484     {
4485         if (!memcmp(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256))
4486         {
4487             return FALSE;
4488         }
4489     }
4490     else
4491     {
4492         surface->palette9 = HeapAlloc(GetProcessHeap(), 0, sizeof(PALETTEENTRY) * 256);
4493     }
4494     memcpy(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256);
4495
4496     return TRUE;
4497 }
4498
4499 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4500 {
4501     /* Flip the surface contents */
4502     /* Flip the DC */
4503     {
4504         HDC tmp;
4505         tmp = front->hDC;
4506         front->hDC = back->hDC;
4507         back->hDC = tmp;
4508     }
4509
4510     /* Flip the DIBsection */
4511     {
4512         HBITMAP tmp;
4513         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4514         tmp = front->dib.DIBsection;
4515         front->dib.DIBsection = back->dib.DIBsection;
4516         back->dib.DIBsection = tmp;
4517
4518         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4519         else front->flags &= ~SFLAG_DIBSECTION;
4520         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4521         else back->flags &= ~SFLAG_DIBSECTION;
4522     }
4523
4524     /* Flip the surface data */
4525     {
4526         void* tmp;
4527
4528         tmp = front->dib.bitmap_data;
4529         front->dib.bitmap_data = back->dib.bitmap_data;
4530         back->dib.bitmap_data = tmp;
4531
4532         tmp = front->resource.allocatedMemory;
4533         front->resource.allocatedMemory = back->resource.allocatedMemory;
4534         back->resource.allocatedMemory = tmp;
4535
4536         tmp = front->resource.heapMemory;
4537         front->resource.heapMemory = back->resource.heapMemory;
4538         back->resource.heapMemory = tmp;
4539     }
4540
4541     /* Flip the PBO */
4542     {
4543         GLuint tmp_pbo = front->pbo;
4544         front->pbo = back->pbo;
4545         back->pbo = tmp_pbo;
4546     }
4547
4548     /* client_memory should not be different, but just in case */
4549     {
4550         BOOL tmp;
4551         tmp = front->dib.client_memory;
4552         front->dib.client_memory = back->dib.client_memory;
4553         back->dib.client_memory = tmp;
4554     }
4555
4556     /* Flip the opengl texture */
4557     {
4558         GLuint tmp;
4559
4560         tmp = back->texture_name;
4561         back->texture_name = front->texture_name;
4562         front->texture_name = tmp;
4563
4564         tmp = back->texture_name_srgb;
4565         back->texture_name_srgb = front->texture_name_srgb;
4566         front->texture_name_srgb = tmp;
4567
4568         resource_unload(&back->resource);
4569         resource_unload(&front->resource);
4570     }
4571
4572     {
4573         DWORD tmp_flags = back->flags;
4574         back->flags = front->flags;
4575         front->flags = tmp_flags;
4576     }
4577 }
4578
4579 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4580  * pixel copy calls. */
4581 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4582         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4583 {
4584     struct wined3d_device *device = dst_surface->resource.device;
4585     float xrel, yrel;
4586     UINT row;
4587     struct wined3d_context *context;
4588     BOOL upsidedown = FALSE;
4589     RECT dst_rect = *dst_rect_in;
4590
4591     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4592      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4593      */
4594     if(dst_rect.top > dst_rect.bottom) {
4595         UINT tmp = dst_rect.bottom;
4596         dst_rect.bottom = dst_rect.top;
4597         dst_rect.top = tmp;
4598         upsidedown = TRUE;
4599     }
4600
4601     context = context_acquire(device, src_surface);
4602     context_apply_blit_state(context, device);
4603     surface_internal_preload(dst_surface, SRGB_RGB);
4604     ENTER_GL();
4605
4606     /* Bind the target texture */
4607     glBindTexture(dst_surface->texture_target, dst_surface->texture_name);
4608     checkGLcall("glBindTexture");
4609     if (surface_is_offscreen(src_surface))
4610     {
4611         TRACE("Reading from an offscreen target\n");
4612         upsidedown = !upsidedown;
4613         glReadBuffer(device->offscreenBuffer);
4614     }
4615     else
4616     {
4617         glReadBuffer(surface_get_gl_buffer(src_surface));
4618     }
4619     checkGLcall("glReadBuffer");
4620
4621     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4622     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4623
4624     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4625     {
4626         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4627
4628         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4629             ERR("Texture filtering not supported in direct blit\n");
4630         }
4631     }
4632     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4633             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4634     {
4635         ERR("Texture filtering not supported in direct blit\n");
4636     }
4637
4638     if (upsidedown
4639             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4640             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4641     {
4642         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4643
4644         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4645                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4646                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4647                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4648     }
4649     else
4650     {
4651         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4652         /* I have to process this row by row to swap the image,
4653          * otherwise it would be upside down, so stretching in y direction
4654          * doesn't cost extra time
4655          *
4656          * However, stretching in x direction can be avoided if not necessary
4657          */
4658         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4659             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4660             {
4661                 /* Well, that stuff works, but it's very slow.
4662                  * find a better way instead
4663                  */
4664                 UINT col;
4665
4666                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4667                 {
4668                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4669                             dst_rect.left + col /* x offset */, row /* y offset */,
4670                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4671                 }
4672             }
4673             else
4674             {
4675                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4676                         dst_rect.left /* x offset */, row /* y offset */,
4677                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4678             }
4679         }
4680     }
4681     checkGLcall("glCopyTexSubImage2D");
4682
4683     LEAVE_GL();
4684     context_release(context);
4685
4686     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4687      * path is never entered
4688      */
4689     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4690 }
4691
4692 /* Uses the hardware to stretch and flip the image */
4693 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4694         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4695 {
4696     struct wined3d_device *device = dst_surface->resource.device;
4697     struct wined3d_swapchain *src_swapchain = NULL;
4698     GLuint src, backup = 0;
4699     float left, right, top, bottom; /* Texture coordinates */
4700     UINT fbwidth = src_surface->resource.width;
4701     UINT fbheight = src_surface->resource.height;
4702     struct wined3d_context *context;
4703     GLenum drawBuffer = GL_BACK;
4704     GLenum texture_target;
4705     BOOL noBackBufferBackup;
4706     BOOL src_offscreen;
4707     BOOL upsidedown = FALSE;
4708     RECT dst_rect = *dst_rect_in;
4709
4710     TRACE("Using hwstretch blit\n");
4711     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4712     context = context_acquire(device, src_surface);
4713     context_apply_blit_state(context, device);
4714     surface_internal_preload(dst_surface, SRGB_RGB);
4715
4716     src_offscreen = surface_is_offscreen(src_surface);
4717     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4718     if (!noBackBufferBackup && !src_surface->texture_name)
4719     {
4720         /* Get it a description */
4721         surface_internal_preload(src_surface, SRGB_RGB);
4722     }
4723     ENTER_GL();
4724
4725     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4726      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4727      */
4728     if (context->aux_buffers >= 2)
4729     {
4730         /* Got more than one aux buffer? Use the 2nd aux buffer */
4731         drawBuffer = GL_AUX1;
4732     }
4733     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4734     {
4735         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4736         drawBuffer = GL_AUX0;
4737     }
4738
4739     if(noBackBufferBackup) {
4740         glGenTextures(1, &backup);
4741         checkGLcall("glGenTextures");
4742         glBindTexture(GL_TEXTURE_2D, backup);
4743         checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4744         texture_target = GL_TEXTURE_2D;
4745     } else {
4746         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4747          * we are reading from the back buffer, the backup can be used as source texture
4748          */
4749         texture_target = src_surface->texture_target;
4750         glBindTexture(texture_target, src_surface->texture_name);
4751         checkGLcall("glBindTexture(texture_target, src_surface->texture_name)");
4752         glEnable(texture_target);
4753         checkGLcall("glEnable(texture_target)");
4754
4755         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4756         src_surface->flags &= ~SFLAG_INTEXTURE;
4757     }
4758
4759     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4760      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4761      */
4762     if(dst_rect.top > dst_rect.bottom) {
4763         UINT tmp = dst_rect.bottom;
4764         dst_rect.bottom = dst_rect.top;
4765         dst_rect.top = tmp;
4766         upsidedown = TRUE;
4767     }
4768
4769     if (src_offscreen)
4770     {
4771         TRACE("Reading from an offscreen target\n");
4772         upsidedown = !upsidedown;
4773         glReadBuffer(device->offscreenBuffer);
4774     }
4775     else
4776     {
4777         glReadBuffer(surface_get_gl_buffer(src_surface));
4778     }
4779
4780     /* TODO: Only back up the part that will be overwritten */
4781     glCopyTexSubImage2D(texture_target, 0,
4782                         0, 0 /* read offsets */,
4783                         0, 0,
4784                         fbwidth,
4785                         fbheight);
4786
4787     checkGLcall("glCopyTexSubImage2D");
4788
4789     /* No issue with overriding these - the sampler is dirty due to blit usage */
4790     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
4791             wined3d_gl_mag_filter(magLookup, Filter));
4792     checkGLcall("glTexParameteri");
4793     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
4794             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
4795     checkGLcall("glTexParameteri");
4796
4797     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4798         src_swapchain = src_surface->container.u.swapchain;
4799     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
4800     {
4801         src = backup ? backup : src_surface->texture_name;
4802     }
4803     else
4804     {
4805         glReadBuffer(GL_FRONT);
4806         checkGLcall("glReadBuffer(GL_FRONT)");
4807
4808         glGenTextures(1, &src);
4809         checkGLcall("glGenTextures(1, &src)");
4810         glBindTexture(GL_TEXTURE_2D, src);
4811         checkGLcall("glBindTexture(GL_TEXTURE_2D, src)");
4812
4813         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
4814          * out for power of 2 sizes
4815          */
4816         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
4817                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
4818         checkGLcall("glTexImage2D");
4819         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
4820                             0, 0 /* read offsets */,
4821                             0, 0,
4822                             fbwidth,
4823                             fbheight);
4824
4825         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
4826         checkGLcall("glTexParameteri");
4827         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
4828         checkGLcall("glTexParameteri");
4829
4830         glReadBuffer(GL_BACK);
4831         checkGLcall("glReadBuffer(GL_BACK)");
4832
4833         if(texture_target != GL_TEXTURE_2D) {
4834             glDisable(texture_target);
4835             glEnable(GL_TEXTURE_2D);
4836             texture_target = GL_TEXTURE_2D;
4837         }
4838     }
4839     checkGLcall("glEnd and previous");
4840
4841     left = src_rect->left;
4842     right = src_rect->right;
4843
4844     if (!upsidedown)
4845     {
4846         top = src_surface->resource.height - src_rect->top;
4847         bottom = src_surface->resource.height - src_rect->bottom;
4848     }
4849     else
4850     {
4851         top = src_surface->resource.height - src_rect->bottom;
4852         bottom = src_surface->resource.height - src_rect->top;
4853     }
4854
4855     if (src_surface->flags & SFLAG_NORMCOORD)
4856     {
4857         left /= src_surface->pow2Width;
4858         right /= src_surface->pow2Width;
4859         top /= src_surface->pow2Height;
4860         bottom /= src_surface->pow2Height;
4861     }
4862
4863     /* draw the source texture stretched and upside down. The correct surface is bound already */
4864     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
4865     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
4866
4867     context_set_draw_buffer(context, drawBuffer);
4868     glReadBuffer(drawBuffer);
4869
4870     glBegin(GL_QUADS);
4871         /* bottom left */
4872         glTexCoord2f(left, bottom);
4873         glVertex2i(0, 0);
4874
4875         /* top left */
4876         glTexCoord2f(left, top);
4877         glVertex2i(0, dst_rect.bottom - dst_rect.top);
4878
4879         /* top right */
4880         glTexCoord2f(right, top);
4881         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4882
4883         /* bottom right */
4884         glTexCoord2f(right, bottom);
4885         glVertex2i(dst_rect.right - dst_rect.left, 0);
4886     glEnd();
4887     checkGLcall("glEnd and previous");
4888
4889     if (texture_target != dst_surface->texture_target)
4890     {
4891         glDisable(texture_target);
4892         glEnable(dst_surface->texture_target);
4893         texture_target = dst_surface->texture_target;
4894     }
4895
4896     /* Now read the stretched and upside down image into the destination texture */
4897     glBindTexture(texture_target, dst_surface->texture_name);
4898     checkGLcall("glBindTexture");
4899     glCopyTexSubImage2D(texture_target,
4900                         0,
4901                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
4902                         0, 0, /* We blitted the image to the origin */
4903                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4904     checkGLcall("glCopyTexSubImage2D");
4905
4906     if(drawBuffer == GL_BACK) {
4907         /* Write the back buffer backup back */
4908         if(backup) {
4909             if(texture_target != GL_TEXTURE_2D) {
4910                 glDisable(texture_target);
4911                 glEnable(GL_TEXTURE_2D);
4912                 texture_target = GL_TEXTURE_2D;
4913             }
4914             glBindTexture(GL_TEXTURE_2D, backup);
4915             checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4916         }
4917         else
4918         {
4919             if (texture_target != src_surface->texture_target)
4920             {
4921                 glDisable(texture_target);
4922                 glEnable(src_surface->texture_target);
4923                 texture_target = src_surface->texture_target;
4924             }
4925             glBindTexture(src_surface->texture_target, src_surface->texture_name);
4926             checkGLcall("glBindTexture(src_surface->texture_target, src_surface->texture_name)");
4927         }
4928
4929         glBegin(GL_QUADS);
4930             /* top left */
4931             glTexCoord2f(0.0f, 0.0f);
4932             glVertex2i(0, fbheight);
4933
4934             /* bottom left */
4935             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
4936             glVertex2i(0, 0);
4937
4938             /* bottom right */
4939             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
4940                     (float)fbheight / (float)src_surface->pow2Height);
4941             glVertex2i(fbwidth, 0);
4942
4943             /* top right */
4944             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
4945             glVertex2i(fbwidth, fbheight);
4946         glEnd();
4947     }
4948     glDisable(texture_target);
4949     checkGLcall("glDisable(texture_target)");
4950
4951     /* Cleanup */
4952     if (src != src_surface->texture_name && src != backup)
4953     {
4954         glDeleteTextures(1, &src);
4955         checkGLcall("glDeleteTextures(1, &src)");
4956     }
4957     if(backup) {
4958         glDeleteTextures(1, &backup);
4959         checkGLcall("glDeleteTextures(1, &backup)");
4960     }
4961
4962     LEAVE_GL();
4963
4964     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
4965
4966     context_release(context);
4967
4968     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4969      * path is never entered
4970      */
4971     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4972 }
4973
4974 /* Front buffer coordinates are always full screen coordinates, but our GL
4975  * drawable is limited to the window's client area. The sysmem and texture
4976  * copies do have the full screen size. Note that GL has a bottom-left
4977  * origin, while D3D has a top-left origin. */
4978 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
4979 {
4980     UINT drawable_height;
4981
4982     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4983             && surface == surface->container.u.swapchain->front_buffer)
4984     {
4985         POINT offset = {0, 0};
4986         RECT windowsize;
4987
4988         ScreenToClient(window, &offset);
4989         OffsetRect(rect, offset.x, offset.y);
4990
4991         GetClientRect(window, &windowsize);
4992         drawable_height = windowsize.bottom - windowsize.top;
4993     }
4994     else
4995     {
4996         drawable_height = surface->resource.height;
4997     }
4998
4999     rect->top = drawable_height - rect->top;
5000     rect->bottom = drawable_height - rect->bottom;
5001 }
5002
5003 /* blit between surface locations. onscreen on different swapchains is not supported.
5004  * depth / stencil is not supported. */
5005 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
5006         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
5007         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
5008 {
5009     const struct wined3d_gl_info *gl_info;
5010     struct wined3d_context *context;
5011     RECT src_rect, dst_rect;
5012     GLenum gl_filter;
5013
5014     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
5015     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
5016             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
5017     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
5018             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
5019
5020     src_rect = *src_rect_in;
5021     dst_rect = *dst_rect_in;
5022
5023     switch (filter)
5024     {
5025         case WINED3DTEXF_LINEAR:
5026             gl_filter = GL_LINEAR;
5027             break;
5028
5029         default:
5030             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
5031         case WINED3DTEXF_NONE:
5032         case WINED3DTEXF_POINT:
5033             gl_filter = GL_NEAREST;
5034             break;
5035     }
5036
5037     if (src_location == SFLAG_INDRAWABLE && surface_is_offscreen(src_surface))
5038         src_location = SFLAG_INTEXTURE;
5039     if (dst_location == SFLAG_INDRAWABLE && surface_is_offscreen(dst_surface))
5040         dst_location = SFLAG_INTEXTURE;
5041
5042     /* Make sure the locations are up-to-date. Loading the destination
5043      * surface isn't required if the entire surface is overwritten. (And is
5044      * in fact harmful if we're being called by surface_load_location() with
5045      * the purpose of loading the destination surface.) */
5046     surface_load_location(src_surface, src_location, NULL);
5047     if (!surface_is_full_rect(dst_surface, &dst_rect))
5048         surface_load_location(dst_surface, dst_location, NULL);
5049
5050     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
5051     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
5052     else context = context_acquire(device, NULL);
5053
5054     if (!context->valid)
5055     {
5056         context_release(context);
5057         WARN("Invalid context, skipping blit.\n");
5058         return;
5059     }
5060
5061     gl_info = context->gl_info;
5062
5063     if (src_location == SFLAG_INDRAWABLE)
5064     {
5065         GLenum buffer = surface_get_gl_buffer(src_surface);
5066
5067         TRACE("Source surface %p is onscreen.\n", src_surface);
5068
5069         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
5070
5071         ENTER_GL();
5072         context_bind_fbo(context, GL_READ_FRAMEBUFFER, NULL);
5073         glReadBuffer(buffer);
5074         checkGLcall("glReadBuffer()");
5075     }
5076     else
5077     {
5078         TRACE("Source surface %p is offscreen.\n", src_surface);
5079         ENTER_GL();
5080         context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
5081         glReadBuffer(GL_COLOR_ATTACHMENT0);
5082         checkGLcall("glReadBuffer()");
5083     }
5084     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
5085     LEAVE_GL();
5086
5087     if (dst_location == SFLAG_INDRAWABLE)
5088     {
5089         GLenum buffer = surface_get_gl_buffer(dst_surface);
5090
5091         TRACE("Destination surface %p is onscreen.\n", dst_surface);
5092
5093         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5094
5095         ENTER_GL();
5096         context_bind_fbo(context, GL_DRAW_FRAMEBUFFER, NULL);
5097         context_set_draw_buffer(context, buffer);
5098     }
5099     else
5100     {
5101         TRACE("Destination surface %p is offscreen.\n", dst_surface);
5102
5103         ENTER_GL();
5104         context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
5105         context_set_draw_buffer(context, GL_COLOR_ATTACHMENT0);
5106     }
5107     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
5108     device_invalidate_state(device, STATE_FRAMEBUFFER);
5109
5110     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
5111     device_invalidate_state(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
5112     device_invalidate_state(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
5113     device_invalidate_state(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
5114     device_invalidate_state(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
5115
5116     glDisable(GL_SCISSOR_TEST);
5117     device_invalidate_state(device, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
5118
5119     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
5120             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
5121     checkGLcall("glBlitFramebuffer()");
5122
5123     LEAVE_GL();
5124
5125     if (wined3d_settings.strict_draw_ordering
5126             || (dst_location == SFLAG_INDRAWABLE
5127             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
5128         wglFlush();
5129
5130     context_release(context);
5131 }
5132
5133 static void surface_blt_to_drawable(struct wined3d_device *device,
5134         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5135         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5136         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5137 {
5138     struct wined3d_context *context;
5139     RECT src_rect, dst_rect;
5140
5141     src_rect = *src_rect_in;
5142     dst_rect = *dst_rect_in;
5143
5144     /* Make sure the surface is up-to-date. This should probably use
5145      * surface_load_location() and worry about the destination surface too,
5146      * unless we're overwriting it completely. */
5147     surface_internal_preload(src_surface, SRGB_RGB);
5148
5149     /* Activate the destination context, set it up for blitting */
5150     context = context_acquire(device, dst_surface);
5151     context_apply_blit_state(context, device);
5152
5153     if (!surface_is_offscreen(dst_surface))
5154         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5155
5156     device->blitter->set_shader(device->blit_priv, context->gl_info, src_surface);
5157
5158     ENTER_GL();
5159
5160     if (color_key)
5161     {
5162         glEnable(GL_ALPHA_TEST);
5163         checkGLcall("glEnable(GL_ALPHA_TEST)");
5164
5165         /* When the primary render target uses P8, the alpha component
5166          * contains the palette index. Which means that the colorkey is one of
5167          * the palette entries. In other cases pixels that should be masked
5168          * away have alpha set to 0. */
5169         if (primary_render_target_is_p8(device))
5170             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5171         else
5172             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5173         checkGLcall("glAlphaFunc");
5174     }
5175     else
5176     {
5177         glDisable(GL_ALPHA_TEST);
5178         checkGLcall("glDisable(GL_ALPHA_TEST)");
5179     }
5180
5181     draw_textured_quad(src_surface, &src_rect, &dst_rect, filter);
5182
5183     if (color_key)
5184     {
5185         glDisable(GL_ALPHA_TEST);
5186         checkGLcall("glDisable(GL_ALPHA_TEST)");
5187     }
5188
5189     LEAVE_GL();
5190
5191     /* Leave the opengl state valid for blitting */
5192     device->blitter->unset_shader(context->gl_info);
5193
5194     if (wined3d_settings.strict_draw_ordering
5195             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5196             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5197         wglFlush(); /* Flush to ensure ordering across contexts. */
5198
5199     context_release(context);
5200 }
5201
5202 /* Do not call while under the GL lock. */
5203 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5204 {
5205     struct wined3d_device *device = s->resource.device;
5206     const struct blit_shader *blitter;
5207
5208     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5209             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5210     if (!blitter)
5211     {
5212         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5213         return WINED3DERR_INVALIDCALL;
5214     }
5215
5216     return blitter->color_fill(device, s, rect, color);
5217 }
5218
5219 /* Do not call while under the GL lock. */
5220 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *DestRect,
5221         struct wined3d_surface *src_surface, const RECT *SrcRect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5222         WINED3DTEXTUREFILTERTYPE Filter)
5223 {
5224     struct wined3d_device *device = dst_surface->resource.device;
5225     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5226     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5227     RECT dst_rect, src_rect;
5228
5229     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5230             dst_surface, wine_dbgstr_rect(DestRect), src_surface, wine_dbgstr_rect(SrcRect),
5231             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5232
5233     /* Get the swapchain. One of the surfaces has to be a primary surface */
5234     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5235     {
5236         WARN("Destination is in sysmem, rejecting gl blt\n");
5237         return WINED3DERR_INVALIDCALL;
5238     }
5239
5240     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5241         dstSwapchain = dst_surface->container.u.swapchain;
5242
5243     if (src_surface)
5244     {
5245         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5246         {
5247             WARN("Src is in sysmem, rejecting gl blt\n");
5248             return WINED3DERR_INVALIDCALL;
5249         }
5250
5251         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5252             srcSwapchain = src_surface->container.u.swapchain;
5253     }
5254
5255     /* Early sort out of cases where no render target is used */
5256     if (!dstSwapchain && !srcSwapchain
5257             && src_surface != device->fb.render_targets[0]
5258             && dst_surface != device->fb.render_targets[0])
5259     {
5260         TRACE("No surface is render target, not using hardware blit.\n");
5261         return WINED3DERR_INVALIDCALL;
5262     }
5263
5264     /* No destination color keying supported */
5265     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5266     {
5267         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5268         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5269         return WINED3DERR_INVALIDCALL;
5270     }
5271
5272     surface_get_rect(dst_surface, DestRect, &dst_rect);
5273     if (src_surface) surface_get_rect(src_surface, SrcRect, &src_rect);
5274
5275     /* The only case where both surfaces on a swapchain are supported is a back buffer -> front buffer blit on the same swapchain */
5276     if (dstSwapchain && dstSwapchain == srcSwapchain && dstSwapchain->back_buffers
5277             && dst_surface == dstSwapchain->front_buffer
5278             && src_surface == dstSwapchain->back_buffers[0])
5279     {
5280         /* Half-Life does a Blt from the back buffer to the front buffer,
5281          * Full surface size, no flags... Use present instead
5282          *
5283          * This path will only be entered for d3d7 and ddraw apps, because d3d8/9 offer no way to blit TO the front buffer
5284          */
5285
5286         /* Check rects - wined3d_swapchain_present() doesn't handle them. */
5287         for (;;)
5288         {
5289             TRACE("Looking if a Present can be done...\n");
5290             /* Source Rectangle must be full surface */
5291             if (src_rect.left || src_rect.top
5292                     || src_rect.right != src_surface->resource.width
5293                     || src_rect.bottom != src_surface->resource.height)
5294             {
5295                 TRACE("No, Source rectangle doesn't match\n");
5296                 break;
5297             }
5298
5299             /* No stretching may occur */
5300             if(src_rect.right != dst_rect.right - dst_rect.left ||
5301                src_rect.bottom != dst_rect.bottom - dst_rect.top) {
5302                 TRACE("No, stretching is done\n");
5303                 break;
5304             }
5305
5306             /* Destination must be full surface or match the clipping rectangle */
5307             if (dst_surface->clipper && dst_surface->clipper->hWnd)
5308             {
5309                 RECT cliprect;
5310                 POINT pos[2];
5311                 GetClientRect(dst_surface->clipper->hWnd, &cliprect);
5312                 pos[0].x = dst_rect.left;
5313                 pos[0].y = dst_rect.top;
5314                 pos[1].x = dst_rect.right;
5315                 pos[1].y = dst_rect.bottom;
5316                 MapWindowPoints(GetDesktopWindow(), dst_surface->clipper->hWnd, pos, 2);
5317
5318                 if(pos[0].x != cliprect.left  || pos[0].y != cliprect.top   ||
5319                    pos[1].x != cliprect.right || pos[1].y != cliprect.bottom)
5320                 {
5321                     TRACE("No, dest rectangle doesn't match(clipper)\n");
5322                     TRACE("Clip rect at %s\n", wine_dbgstr_rect(&cliprect));
5323                     TRACE("Blt dest: %s\n", wine_dbgstr_rect(&dst_rect));
5324                     break;
5325                 }
5326             }
5327             else if (dst_rect.left || dst_rect.top
5328                     || dst_rect.right != dst_surface->resource.width
5329                     || dst_rect.bottom != dst_surface->resource.height)
5330             {
5331                 TRACE("No, dest rectangle doesn't match(surface size)\n");
5332                 break;
5333             }
5334
5335             TRACE("Yes\n");
5336
5337             /* These flags are unimportant for the flag check, remove them */
5338             if (!(flags & ~(WINEDDBLT_DONOTWAIT | WINEDDBLT_WAIT)))
5339             {
5340                 WINED3DSWAPEFFECT orig_swap = dstSwapchain->presentParms.SwapEffect;
5341
5342                 /* The idea behind this is that a glReadPixels and a glDrawPixels call
5343                     * take very long, while a flip is fast.
5344                     * This applies to Half-Life, which does such Blts every time it finished
5345                     * a frame, and to Prince of Persia 3D, which uses this to draw at least the main
5346                     * menu. This is also used by all apps when they do windowed rendering
5347                     *
5348                     * The problem is that flipping is not really the same as copying. After a
5349                     * Blt the front buffer is a copy of the back buffer, and the back buffer is
5350                     * untouched. Therefore it's necessary to override the swap effect
5351                     * and to set it back after the flip.
5352                     *
5353                     * Windowed Direct3D < 7 apps do the same. The D3D7 sdk demos are nice
5354                     * testcases.
5355                     */
5356
5357                 dstSwapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
5358                 dstSwapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
5359
5360                 TRACE("Full screen back buffer -> front buffer blt, performing a flip instead.\n");
5361                 wined3d_swapchain_present(dstSwapchain, NULL, NULL, dstSwapchain->win_handle, NULL, 0);
5362
5363                 dstSwapchain->presentParms.SwapEffect = orig_swap;
5364
5365                 return WINED3D_OK;
5366             }
5367             break;
5368         }
5369
5370         TRACE("Unsupported blit between buffers on the same swapchain\n");
5371         return WINED3DERR_INVALIDCALL;
5372     } else if(dstSwapchain && dstSwapchain == srcSwapchain) {
5373         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5374         return WINED3DERR_INVALIDCALL;
5375     } else if(dstSwapchain && srcSwapchain) {
5376         FIXME("Implement hardware blit between two different swapchains\n");
5377         return WINED3DERR_INVALIDCALL;
5378     }
5379     else if (dstSwapchain)
5380     {
5381         /* Handled with regular texture -> swapchain blit */
5382         if (src_surface == device->fb.render_targets[0])
5383             TRACE("Blit from active render target to a swapchain\n");
5384     }
5385     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5386     {
5387         FIXME("Implement blit from a swapchain to the active render target\n");
5388         return WINED3DERR_INVALIDCALL;
5389     }
5390
5391     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5392     {
5393         /* Blit from render target to texture */
5394         BOOL stretchx;
5395
5396         /* P8 read back is not implemented */
5397         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5398                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5399         {
5400             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5401             return WINED3DERR_INVALIDCALL;
5402         }
5403
5404         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5405         {
5406             TRACE("Color keying not supported by frame buffer to texture blit\n");
5407             return WINED3DERR_INVALIDCALL;
5408             /* Destination color key is checked above */
5409         }
5410
5411         if(dst_rect.right - dst_rect.left != src_rect.right - src_rect.left) {
5412             stretchx = TRUE;
5413         } else {
5414             stretchx = FALSE;
5415         }
5416
5417         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5418          * flip the image nor scale it.
5419          *
5420          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5421          * -> If the app wants a image width an unscaled width, copy it line per line
5422          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5423          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5424          *    back buffer. This is slower than reading line per line, thus not used for flipping
5425          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5426          *    pixel by pixel
5427          *
5428          * If EXT_framebuffer_blit is supported that can be used instead. Note that EXT_framebuffer_blit implies
5429          * FBO support, so it doesn't really make sense to try and make it work with different offscreen rendering
5430          * backends. */
5431         if (fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5432                 &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5433                 &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5434         {
5435             surface_blt_fbo(device, Filter,
5436                     src_surface, SFLAG_INDRAWABLE, &src_rect,
5437                     dst_surface, SFLAG_INDRAWABLE, &dst_rect);
5438             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5439         }
5440         else if (!stretchx || dst_rect.right - dst_rect.left > src_surface->resource.width
5441                 || dst_rect.bottom - dst_rect.top > src_surface->resource.height)
5442         {
5443             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5444             fb_copy_to_texture_direct(dst_surface, src_surface, &src_rect, &dst_rect, Filter);
5445         } else {
5446             TRACE("Using hardware stretching to flip / stretch the texture\n");
5447             fb_copy_to_texture_hwstretch(dst_surface, src_surface, &src_rect, &dst_rect, Filter);
5448         }
5449
5450         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5451         {
5452             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5453             dst_surface->resource.allocatedMemory = NULL;
5454             dst_surface->resource.heapMemory = NULL;
5455         }
5456         else
5457         {
5458             dst_surface->flags &= ~SFLAG_INSYSMEM;
5459         }
5460
5461         return WINED3D_OK;
5462     }
5463     else if (src_surface)
5464     {
5465         /* Blit from offscreen surface to render target */
5466         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5467         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5468
5469         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5470
5471         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5472                 && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5473                         &src_rect, src_surface->resource.usage, src_surface->resource.pool,
5474                         src_surface->resource.format,
5475                         &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5476                         dst_surface->resource.format))
5477         {
5478             TRACE("Using surface_blt_fbo.\n");
5479             /* The source is always a texture, but never the currently active render target, and the texture
5480              * contents are never upside down. */
5481             surface_blt_fbo(device, Filter,
5482                     src_surface, SFLAG_INDRAWABLE, &src_rect,
5483                     dst_surface, SFLAG_INDRAWABLE, &dst_rect);
5484             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5485             return WINED3D_OK;
5486         }
5487
5488         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5489                 && arbfp_blit.blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5490                         &src_rect, src_surface->resource.usage, src_surface->resource.pool,
5491                         src_surface->resource.format,
5492                         &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5493                         dst_surface->resource.format))
5494         {
5495             return arbfp_blit_surface(device, src_surface, &src_rect, dst_surface, &dst_rect,
5496                     WINED3D_BLIT_OP_COLOR_BLIT, Filter);
5497         }
5498
5499         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5500                 &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5501                 &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5502         {
5503             FIXME("Unsupported blit operation falling back to software\n");
5504             return WINED3DERR_INVALIDCALL;
5505         }
5506
5507         /* Color keying: Check if we have to do a color keyed blt,
5508          * and if not check if a color key is activated.
5509          *
5510          * Just modify the color keying parameters in the surface and restore them afterwards
5511          * The surface keeps track of the color key last used to load the opengl surface.
5512          * PreLoad will catch the change to the flags and color key and reload if necessary.
5513          */
5514         if (flags & WINEDDBLT_KEYSRC)
5515         {
5516             /* Use color key from surface */
5517         }
5518         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5519         {
5520             /* Use color key from DDBltFx */
5521             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5522             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5523         }
5524         else
5525         {
5526             /* Do not use color key */
5527             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5528         }
5529
5530         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5531                 src_surface, &src_rect, dst_surface, &dst_rect);
5532
5533         /* Restore the color key parameters */
5534         src_surface->CKeyFlags = oldCKeyFlags;
5535         src_surface->SrcBltCKey = oldBltCKey;
5536
5537         surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5538
5539         return WINED3D_OK;
5540     }
5541     else
5542     {
5543         /* Source-Less Blit to render target */
5544         if (flags & WINEDDBLT_COLORFILL)
5545         {
5546             WINED3DCOLORVALUE color;
5547
5548             TRACE("Colorfill\n");
5549
5550             /* The color as given in the Blt function is in the surface format. */
5551             if (!surface_convert_color_to_float(dst_surface, DDBltFx->u5.dwFillColor, &color))
5552                 return WINED3DERR_INVALIDCALL;
5553
5554             return surface_color_fill(dst_surface, &dst_rect, &color);
5555         }
5556     }
5557
5558     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5559     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5560     return WINED3DERR_INVALIDCALL;
5561 }
5562
5563 /* GL locking is done by the caller */
5564 static void surface_depth_blt(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
5565         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5566 {
5567     struct wined3d_device *device = surface->resource.device;
5568     GLint compare_mode = GL_NONE;
5569     struct blt_info info;
5570     GLint old_binding = 0;
5571     RECT rect;
5572
5573     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5574
5575     glDisable(GL_CULL_FACE);
5576     glDisable(GL_BLEND);
5577     glDisable(GL_ALPHA_TEST);
5578     glDisable(GL_SCISSOR_TEST);
5579     glDisable(GL_STENCIL_TEST);
5580     glEnable(GL_DEPTH_TEST);
5581     glDepthFunc(GL_ALWAYS);
5582     glDepthMask(GL_TRUE);
5583     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5584     glViewport(x, y, w, h);
5585
5586     SetRect(&rect, 0, h, w, 0);
5587     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5588     GL_EXTCALL(glActiveTextureARB(GL_TEXTURE0_ARB));
5589     glGetIntegerv(info.binding, &old_binding);
5590     glBindTexture(info.bind_target, texture);
5591     if (gl_info->supported[ARB_SHADOW])
5592     {
5593         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5594         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5595     }
5596
5597     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5598             gl_info, info.tex_type, &surface->ds_current_size);
5599
5600     glBegin(GL_TRIANGLE_STRIP);
5601     glTexCoord3fv(info.coords[0]);
5602     glVertex2f(-1.0f, -1.0f);
5603     glTexCoord3fv(info.coords[1]);
5604     glVertex2f(1.0f, -1.0f);
5605     glTexCoord3fv(info.coords[2]);
5606     glVertex2f(-1.0f, 1.0f);
5607     glTexCoord3fv(info.coords[3]);
5608     glVertex2f(1.0f, 1.0f);
5609     glEnd();
5610
5611     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5612     glBindTexture(info.bind_target, old_binding);
5613
5614     glPopAttrib();
5615
5616     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5617 }
5618
5619 void surface_modify_ds_location(struct wined3d_surface *surface,
5620         DWORD location, UINT w, UINT h)
5621 {
5622     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5623
5624     if (location & ~SFLAG_DS_LOCATIONS)
5625         FIXME("Invalid location (%#x) specified.\n", location);
5626
5627     surface->ds_current_size.cx = w;
5628     surface->ds_current_size.cy = h;
5629     surface->flags &= ~SFLAG_DS_LOCATIONS;
5630     surface->flags |= location;
5631 }
5632
5633 /* Context activation is done by the caller. */
5634 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5635 {
5636     struct wined3d_device *device = surface->resource.device;
5637     const struct wined3d_gl_info *gl_info = context->gl_info;
5638     GLsizei w, h;
5639
5640     TRACE("surface %p, new location %#x.\n", surface, location);
5641
5642     /* TODO: Make this work for modes other than FBO */
5643     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5644
5645     if (!(surface->flags & location))
5646     {
5647         w = surface->ds_current_size.cx;
5648         h = surface->ds_current_size.cy;
5649         surface->ds_current_size.cx = 0;
5650         surface->ds_current_size.cy = 0;
5651     }
5652     else
5653     {
5654         w = surface->resource.width;
5655         h = surface->resource.height;
5656     }
5657
5658     if (surface->ds_current_size.cx == surface->resource.width
5659             && surface->ds_current_size.cy == surface->resource.height)
5660     {
5661         TRACE("Location (%#x) is already up to date.\n", location);
5662         return;
5663     }
5664
5665     if (surface->current_renderbuffer)
5666     {
5667         FIXME("Not supported with fixed up depth stencil.\n");
5668         return;
5669     }
5670
5671     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5672     {
5673         /* This mostly happens when a depth / stencil is used without being
5674          * cleared first. In principle we could upload from sysmem, or
5675          * explicitly clear before first usage. For the moment there don't
5676          * appear to be a lot of applications depending on this, so a FIXME
5677          * should do. */
5678         FIXME("No up to date depth stencil location.\n");
5679         surface->flags |= location;
5680         surface->ds_current_size.cx = surface->resource.width;
5681         surface->ds_current_size.cy = surface->resource.height;
5682         return;
5683     }
5684
5685     if (location == SFLAG_DS_OFFSCREEN)
5686     {
5687         GLint old_binding = 0;
5688         GLenum bind_target;
5689
5690         /* The render target is allowed to be smaller than the depth/stencil
5691          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5692          * than the offscreen surface. Don't overwrite the offscreen surface
5693          * with undefined data. */
5694         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5695         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5696
5697         TRACE("Copying onscreen depth buffer to depth texture.\n");
5698
5699         ENTER_GL();
5700
5701         if (!device->depth_blt_texture)
5702         {
5703             glGenTextures(1, &device->depth_blt_texture);
5704         }
5705
5706         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5707          * directly on the FBO texture. That's because we need to flip. */
5708         context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
5709         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5710         {
5711             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5712             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5713         }
5714         else
5715         {
5716             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5717             bind_target = GL_TEXTURE_2D;
5718         }
5719         glBindTexture(bind_target, device->depth_blt_texture);
5720         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5721          * internal format, because the internal format might include stencil
5722          * data. In principle we should copy stencil data as well, but unless
5723          * the driver supports stencil export it's hard to do, and doesn't
5724          * seem to be needed in practice. If the hardware doesn't support
5725          * writing stencil data, the glCopyTexImage2D() call might trigger
5726          * software fallbacks. */
5727         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5728         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5729         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5730         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5731         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5732         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5733         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5734         glBindTexture(bind_target, old_binding);
5735
5736         /* Setup the destination */
5737         if (!device->depth_blt_rb)
5738         {
5739             gl_info->fbo_ops.glGenRenderbuffers(1, &device->depth_blt_rb);
5740             checkGLcall("glGenRenderbuffersEXT");
5741         }
5742         if (device->depth_blt_rb_w != w || device->depth_blt_rb_h != h)
5743         {
5744             gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, device->depth_blt_rb);
5745             checkGLcall("glBindRenderbufferEXT");
5746             gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, w, h);
5747             checkGLcall("glRenderbufferStorageEXT");
5748             device->depth_blt_rb_w = w;
5749             device->depth_blt_rb_h = h;
5750         }
5751
5752         context_bind_fbo(context, GL_FRAMEBUFFER, &context->dst_fbo);
5753         gl_info->fbo_ops.glFramebufferRenderbuffer(GL_FRAMEBUFFER,
5754                 GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, device->depth_blt_rb);
5755         checkGLcall("glFramebufferRenderbufferEXT");
5756         context_attach_depth_stencil_fbo(context, GL_FRAMEBUFFER, surface, FALSE);
5757
5758         /* Do the actual blit */
5759         surface_depth_blt(surface, gl_info, device->depth_blt_texture, 0, 0, w, h, bind_target);
5760         checkGLcall("depth_blt");
5761
5762         if (context->current_fbo) context_bind_fbo(context, GL_FRAMEBUFFER, &context->current_fbo->id);
5763         else context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
5764
5765         LEAVE_GL();
5766
5767         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5768     }
5769     else if (location == SFLAG_DS_ONSCREEN)
5770     {
5771         TRACE("Copying depth texture to onscreen depth buffer.\n");
5772
5773         ENTER_GL();
5774
5775         context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
5776         surface_depth_blt(surface, gl_info, surface->texture_name,
5777                 0, surface->pow2Height - h, w, h, surface->texture_target);
5778         checkGLcall("depth_blt");
5779
5780         if (context->current_fbo) context_bind_fbo(context, GL_FRAMEBUFFER, &context->current_fbo->id);
5781
5782         LEAVE_GL();
5783
5784         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5785     }
5786     else
5787     {
5788         ERR("Invalid location (%#x) specified.\n", location);
5789     }
5790
5791     surface->flags |= location;
5792     surface->ds_current_size.cx = surface->resource.width;
5793     surface->ds_current_size.cy = surface->resource.height;
5794 }
5795
5796 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5797 {
5798     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5799     struct wined3d_surface *overlay;
5800
5801     TRACE("surface %p, location %s, persistent %#x.\n",
5802             surface, debug_surflocation(location), persistent);
5803
5804     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
5805     {
5806         if (surface_is_offscreen(surface))
5807         {
5808             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same
5809              * for offscreen targets. */
5810             if (location & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE))
5811                 location |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
5812         }
5813         else
5814         {
5815             TRACE("Surface %p is an onscreen surface.\n", surface);
5816         }
5817     }
5818
5819     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5820             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5821         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5822
5823     if (persistent)
5824     {
5825         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5826                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5827         {
5828             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5829             {
5830                 TRACE("Passing to container.\n");
5831                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5832             }
5833         }
5834         surface->flags &= ~SFLAG_LOCATIONS;
5835         surface->flags |= location;
5836
5837         /* Redraw emulated overlays, if any */
5838         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5839         {
5840             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5841             {
5842                 overlay->surface_ops->surface_draw_overlay(overlay);
5843             }
5844         }
5845     }
5846     else
5847     {
5848         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5849         {
5850             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5851             {
5852                 TRACE("Passing to container\n");
5853                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5854             }
5855         }
5856         surface->flags &= ~location;
5857     }
5858
5859     if (!(surface->flags & SFLAG_LOCATIONS))
5860     {
5861         ERR("Surface %p does not have any up to date location.\n", surface);
5862     }
5863 }
5864
5865 static DWORD resource_access_from_location(DWORD location)
5866 {
5867     switch (location)
5868     {
5869         case SFLAG_INSYSMEM:
5870             return WINED3D_RESOURCE_ACCESS_CPU;
5871
5872         case SFLAG_INDRAWABLE:
5873         case SFLAG_INSRGBTEX:
5874         case SFLAG_INTEXTURE:
5875             return WINED3D_RESOURCE_ACCESS_GPU;
5876
5877         default:
5878             FIXME("Unhandled location %#x.\n", location);
5879             return 0;
5880     }
5881 }
5882
5883 static void surface_load_sysmem(struct wined3d_surface *surface,
5884         const struct wined3d_gl_info *gl_info, const RECT *rect)
5885 {
5886     surface_prepare_system_memory(surface);
5887
5888     /* Download the surface to system memory. */
5889     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5890     {
5891         struct wined3d_device *device = surface->resource.device;
5892         struct wined3d_context *context = NULL;
5893
5894         if (!device->isInDraw)
5895             context = context_acquire(device, NULL);
5896
5897         surface_bind_and_dirtify(surface, gl_info, !(surface->flags & SFLAG_INTEXTURE));
5898         surface_download_data(surface, gl_info);
5899
5900         if (context)
5901             context_release(context);
5902
5903         return;
5904     }
5905
5906     /* Note: It might be faster to download into a texture first. */
5907     read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5908             wined3d_surface_get_pitch(surface));
5909 }
5910
5911 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5912         const struct wined3d_gl_info *gl_info, const RECT *rect)
5913 {
5914     struct wined3d_device *device = surface->resource.device;
5915     struct wined3d_format format;
5916     CONVERT_TYPES convert;
5917     UINT byte_count;
5918     BYTE *mem;
5919
5920     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5921         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5922
5923     if (surface->flags & SFLAG_INTEXTURE)
5924     {
5925         RECT r;
5926
5927         surface_get_rect(surface, rect, &r);
5928         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5929
5930         return WINED3D_OK;
5931     }
5932
5933     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5934     {
5935         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5936          * path through sysmem. */
5937         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5938     }
5939
5940     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5941
5942     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5943      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5944      * called. */
5945     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5946     {
5947         struct wined3d_context *context = NULL;
5948
5949         TRACE("Removing the pbo attached to surface %p.\n", surface);
5950
5951         if (!device->isInDraw)
5952             context = context_acquire(device, NULL);
5953
5954         surface_remove_pbo(surface, gl_info);
5955
5956         if (context)
5957             context_release(context);
5958     }
5959
5960     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5961     {
5962         UINT height = surface->resource.height;
5963         UINT width = surface->resource.width;
5964         UINT src_pitch, dst_pitch;
5965
5966         byte_count = format.conv_byte_count;
5967         src_pitch = wined3d_surface_get_pitch(surface);
5968
5969         /* Stick to the alignment for the converted surface too, makes it
5970          * easier to load the surface. */
5971         dst_pitch = width * byte_count;
5972         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5973
5974         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5975         {
5976             ERR("Out of memory (%u).\n", dst_pitch * height);
5977             return E_OUTOFMEMORY;
5978         }
5979
5980         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5981                 src_pitch, width, height, dst_pitch, convert, surface);
5982
5983         surface->flags |= SFLAG_CONVERTED;
5984     }
5985     else
5986     {
5987         surface->flags &= ~SFLAG_CONVERTED;
5988         mem = surface->resource.allocatedMemory;
5989         byte_count = format.byte_count;
5990     }
5991
5992     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5993
5994     /* Don't delete PBO memory. */
5995     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5996         HeapFree(GetProcessHeap(), 0, mem);
5997
5998     return WINED3D_OK;
5999 }
6000
6001 static HRESULT surface_load_texture(struct wined3d_surface *surface,
6002         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
6003 {
6004     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
6005     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
6006     struct wined3d_device *device = surface->resource.device;
6007     struct wined3d_context *context = NULL;
6008     UINT width, src_pitch, dst_pitch;
6009     struct wined3d_bo_address data;
6010     struct wined3d_format format;
6011     POINT dst_point = {0, 0};
6012     CONVERT_TYPES convert;
6013     BYTE *mem;
6014
6015     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6016             && surface_is_offscreen(surface)
6017             && (surface->flags & SFLAG_INDRAWABLE))
6018     {
6019         read_from_framebuffer_texture(surface, srgb);
6020
6021         return WINED3D_OK;
6022     }
6023
6024     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6025             && (surface->resource.format->flags & attach_flags) == attach_flags
6026             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6027                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6028                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6029     {
6030         if (srgb)
6031             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
6032                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6033         else
6034             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
6035                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6036
6037         return WINED3D_OK;
6038     }
6039
6040     /* Upload from system memory */
6041
6042     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6043             TRUE /* We will use textures */, &format, &convert);
6044
6045     if (srgb)
6046     {
6047         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6048         {
6049             /* Performance warning... */
6050             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6051             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6052         }
6053     }
6054     else
6055     {
6056         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6057         {
6058             /* Performance warning... */
6059             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6060             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6061         }
6062     }
6063
6064     if (!(surface->flags & SFLAG_INSYSMEM))
6065     {
6066         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6067         /* Lets hope we get it from somewhere... */
6068         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6069     }
6070
6071     if (!device->isInDraw)
6072         context = context_acquire(device, NULL);
6073
6074     surface_prepare_texture(surface, gl_info, srgb);
6075     surface_bind_and_dirtify(surface, gl_info, srgb);
6076
6077     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6078     {
6079         surface->flags |= SFLAG_GLCKEY;
6080         surface->glCKey = surface->SrcBltCKey;
6081     }
6082     else surface->flags &= ~SFLAG_GLCKEY;
6083
6084     width = surface->resource.width;
6085     src_pitch = wined3d_surface_get_pitch(surface);
6086
6087     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6088      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6089      * called. */
6090     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6091     {
6092         TRACE("Removing the pbo attached to surface %p.\n", surface);
6093         surface_remove_pbo(surface, gl_info);
6094     }
6095
6096     if (format.convert)
6097     {
6098         /* This code is entered for texture formats which need a fixup. */
6099         UINT height = surface->resource.height;
6100
6101         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6102         dst_pitch = width * format.conv_byte_count;
6103         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6104
6105         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6106         {
6107             ERR("Out of memory (%u).\n", dst_pitch * height);
6108             if (context)
6109                 context_release(context);
6110             return E_OUTOFMEMORY;
6111         }
6112         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6113     }
6114     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6115     {
6116         /* This code is only entered for color keying fixups */
6117         UINT height = surface->resource.height;
6118
6119         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6120         dst_pitch = width * format.conv_byte_count;
6121         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6122
6123         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6124         {
6125             ERR("Out of memory (%u).\n", dst_pitch * height);
6126             if (context)
6127                 context_release(context);
6128             return E_OUTOFMEMORY;
6129         }
6130         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6131                 width, height, dst_pitch, convert, surface);
6132     }
6133     else
6134     {
6135         mem = surface->resource.allocatedMemory;
6136     }
6137
6138     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6139     data.addr = mem;
6140     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6141
6142     if (context)
6143         context_release(context);
6144
6145     /* Don't delete PBO memory. */
6146     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6147         HeapFree(GetProcessHeap(), 0, mem);
6148
6149     return WINED3D_OK;
6150 }
6151
6152 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6153 {
6154     struct wined3d_device *device = surface->resource.device;
6155     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6156     BOOL in_fbo = FALSE;
6157     HRESULT hr;
6158
6159     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6160
6161     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6162     {
6163         if (location == SFLAG_INTEXTURE)
6164         {
6165             struct wined3d_context *context = context_acquire(device, NULL);
6166             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6167             context_release(context);
6168             return WINED3D_OK;
6169         }
6170         else
6171         {
6172             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6173             return WINED3DERR_INVALIDCALL;
6174         }
6175     }
6176
6177     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6178     {
6179         if (surface_is_offscreen(surface))
6180         {
6181             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same
6182              * for offscreen targets. Prefer SFLAG_INTEXTURE. */
6183             if (location == SFLAG_INDRAWABLE)
6184                 location = SFLAG_INTEXTURE;
6185             in_fbo = TRUE;
6186         }
6187         else
6188         {
6189             TRACE("Surface %p is an onscreen surface.\n", surface);
6190         }
6191     }
6192
6193     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6194         location = SFLAG_INTEXTURE;
6195
6196     if (surface->flags & location)
6197     {
6198         TRACE("Location already up to date.\n");
6199         return WINED3D_OK;
6200     }
6201
6202     if (WARN_ON(d3d_surface))
6203     {
6204         DWORD required_access = resource_access_from_location(location);
6205         if ((surface->resource.access_flags & required_access) != required_access)
6206             WARN("Operation requires %#x access, but surface only has %#x.\n",
6207                     required_access, surface->resource.access_flags);
6208     }
6209
6210     if (!(surface->flags & SFLAG_LOCATIONS))
6211     {
6212         ERR("Surface %p does not have any up to date location.\n", surface);
6213         surface->flags |= SFLAG_LOST;
6214         return WINED3DERR_DEVICELOST;
6215     }
6216
6217     switch (location)
6218     {
6219         case SFLAG_INSYSMEM:
6220             surface_load_sysmem(surface, gl_info, rect);
6221             break;
6222
6223         case SFLAG_INDRAWABLE:
6224             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6225                 return hr;
6226             break;
6227
6228         case SFLAG_INTEXTURE:
6229         case SFLAG_INSRGBTEX:
6230             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6231                 return hr;
6232             break;
6233
6234         default:
6235             ERR("Don't know how to handle location %#x.\n", location);
6236             break;
6237     }
6238
6239     if (!rect)
6240     {
6241         surface->flags |= location;
6242
6243         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6244             surface_evict_sysmem(surface);
6245     }
6246
6247     if (in_fbo && (surface->flags & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)))
6248     {
6249         /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
6250         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
6251     }
6252
6253     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6254             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6255     {
6256         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6257     }
6258
6259     return WINED3D_OK;
6260 }
6261
6262 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6263 {
6264     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6265
6266     /* Not on a swapchain - must be offscreen */
6267     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6268
6269     /* The front buffer is always onscreen */
6270     if (surface == swapchain->front_buffer) return FALSE;
6271
6272     /* If the swapchain is rendered to an FBO, the backbuffer is
6273      * offscreen, otherwise onscreen */
6274     return swapchain->render_to_fbo;
6275 }
6276
6277 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6278 /* Context activation is done by the caller. */
6279 static void ffp_blit_free(struct wined3d_device *device) { }
6280
6281 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6282 /* Context activation is done by the caller. */
6283 static void ffp_blit_p8_upload_palette(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6284 {
6285     BYTE table[256][4];
6286     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6287
6288     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6289
6290     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6291     ENTER_GL();
6292     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6293     LEAVE_GL();
6294 }
6295
6296 /* Context activation is done by the caller. */
6297 static HRESULT ffp_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6298 {
6299     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6300
6301     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6302      * else the surface is converted in software at upload time in LoadLocation.
6303      */
6304     if(fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6305         ffp_blit_p8_upload_palette(surface, gl_info);
6306
6307     ENTER_GL();
6308     glEnable(surface->texture_target);
6309     checkGLcall("glEnable(surface->texture_target)");
6310     LEAVE_GL();
6311     return WINED3D_OK;
6312 }
6313
6314 /* Context activation is done by the caller. */
6315 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6316 {
6317     ENTER_GL();
6318     glDisable(GL_TEXTURE_2D);
6319     checkGLcall("glDisable(GL_TEXTURE_2D)");
6320     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6321     {
6322         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6323         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6324     }
6325     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6326     {
6327         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6328         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6329     }
6330     LEAVE_GL();
6331 }
6332
6333 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6334         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6335         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6336 {
6337     enum complex_fixup src_fixup;
6338
6339     switch (blit_op)
6340     {
6341         case WINED3D_BLIT_OP_COLOR_BLIT:
6342             src_fixup = get_complex_fixup(src_format->color_fixup);
6343             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6344             {
6345                 TRACE("Checking support for fixup:\n");
6346                 dump_color_fixup_desc(src_format->color_fixup);
6347             }
6348
6349             if (!is_identity_fixup(dst_format->color_fixup))
6350             {
6351                 TRACE("Destination fixups are not supported\n");
6352                 return FALSE;
6353             }
6354
6355             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6356             {
6357                 TRACE("P8 fixup supported\n");
6358                 return TRUE;
6359             }
6360
6361             /* We only support identity conversions. */
6362             if (is_identity_fixup(src_format->color_fixup))
6363             {
6364                 TRACE("[OK]\n");
6365                 return TRUE;
6366             }
6367
6368             TRACE("[FAILED]\n");
6369             return FALSE;
6370
6371         case WINED3D_BLIT_OP_COLOR_FILL:
6372             if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6373             {
6374                 TRACE("Color fill not supported\n");
6375                 return FALSE;
6376             }
6377
6378             return TRUE;
6379
6380         case WINED3D_BLIT_OP_DEPTH_FILL:
6381             return TRUE;
6382
6383         default:
6384             TRACE("Unsupported blit_op=%d\n", blit_op);
6385             return FALSE;
6386     }
6387 }
6388
6389 /* Do not call while under the GL lock. */
6390 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6391         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6392 {
6393     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6394     struct wined3d_fb_state fb = {&dst_surface, NULL};
6395
6396     return device_clear_render_targets(device, 1, &fb,
6397             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6398 }
6399
6400 /* Do not call while under the GL lock. */
6401 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6402         struct wined3d_surface *surface, const RECT *rect, float depth)
6403 {
6404     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6405     struct wined3d_fb_state fb = {NULL, surface};
6406
6407     return device_clear_render_targets(device, 0, &fb,
6408             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6409 }
6410
6411 const struct blit_shader ffp_blit =  {
6412     ffp_blit_alloc,
6413     ffp_blit_free,
6414     ffp_blit_set,
6415     ffp_blit_unset,
6416     ffp_blit_supported,
6417     ffp_blit_color_fill,
6418     ffp_blit_depth_fill,
6419 };
6420
6421 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6422 {
6423     return WINED3D_OK;
6424 }
6425
6426 /* Context activation is done by the caller. */
6427 static void cpu_blit_free(struct wined3d_device *device)
6428 {
6429 }
6430
6431 /* Context activation is done by the caller. */
6432 static HRESULT cpu_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6433 {
6434     return WINED3D_OK;
6435 }
6436
6437 /* Context activation is done by the caller. */
6438 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6439 {
6440 }
6441
6442 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6443         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6444         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6445 {
6446     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6447     {
6448         return TRUE;
6449     }
6450
6451     return FALSE;
6452 }
6453
6454 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6455         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6456         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6457 {
6458     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6459     const struct wined3d_format *src_format, *dst_format;
6460     struct wined3d_surface *orig_src = src_surface;
6461     WINED3DLOCKED_RECT dlock, slock;
6462     HRESULT hr = WINED3D_OK;
6463     const BYTE *sbuf;
6464     RECT xdst,xsrc;
6465     BYTE *dbuf;
6466     int x, y;
6467
6468     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6469             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6470             flags, fx, debug_d3dtexturefiltertype(filter));
6471
6472     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
6473     {
6474         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY\n");
6475         return WINEDDERR_SURFACEBUSY;
6476     }
6477
6478     /* First check for the validity of source / destination rectangles.
6479      * This was verified using a test application and by MSDN. */
6480     if (src_rect)
6481     {
6482         if (src_surface)
6483         {
6484             if (src_rect->right < src_rect->left || src_rect->bottom < src_rect->top
6485                     || src_rect->left > src_surface->resource.width || src_rect->left < 0
6486                     || src_rect->top > src_surface->resource.height || src_rect->top < 0
6487                     || src_rect->right > src_surface->resource.width || src_rect->right < 0
6488                     || src_rect->bottom > src_surface->resource.height || src_rect->bottom < 0)
6489             {
6490                 WARN("Application gave us bad source rectangle for Blt.\n");
6491                 return WINEDDERR_INVALIDRECT;
6492             }
6493
6494             if (!src_rect->right || !src_rect->bottom
6495                     || src_rect->left == (int)src_surface->resource.width
6496                     || src_rect->top == (int)src_surface->resource.height)
6497             {
6498                 TRACE("Nothing to be done.\n");
6499                 return WINED3D_OK;
6500             }
6501         }
6502
6503         xsrc = *src_rect;
6504     }
6505     else if (src_surface)
6506     {
6507         xsrc.left = 0;
6508         xsrc.top = 0;
6509         xsrc.right = src_surface->resource.width;
6510         xsrc.bottom = src_surface->resource.height;
6511     }
6512     else
6513     {
6514         memset(&xsrc, 0, sizeof(xsrc));
6515     }
6516
6517     if (dst_rect)
6518     {
6519         /* For the Destination rect, it can be out of bounds on the condition
6520          * that a clipper is set for the given surface. */
6521         if (!dst_surface->clipper && (dst_rect->right < dst_rect->left || dst_rect->bottom < dst_rect->top
6522                 || dst_rect->left > dst_surface->resource.width || dst_rect->left < 0
6523                 || dst_rect->top > dst_surface->resource.height || dst_rect->top < 0
6524                 || dst_rect->right > dst_surface->resource.width || dst_rect->right < 0
6525                 || dst_rect->bottom > dst_surface->resource.height || dst_rect->bottom < 0))
6526         {
6527             WARN("Application gave us bad destination rectangle for Blt without a clipper set.\n");
6528             return WINEDDERR_INVALIDRECT;
6529         }
6530
6531         if (dst_rect->right <= 0 || dst_rect->bottom <= 0
6532                 || dst_rect->left >= (int)dst_surface->resource.width
6533                 || dst_rect->top >= (int)dst_surface->resource.height)
6534         {
6535             TRACE("Nothing to be done.\n");
6536             return WINED3D_OK;
6537         }
6538
6539         if (!src_surface)
6540         {
6541             RECT full_rect;
6542
6543             full_rect.left = 0;
6544             full_rect.top = 0;
6545             full_rect.right = dst_surface->resource.width;
6546             full_rect.bottom = dst_surface->resource.height;
6547             IntersectRect(&xdst, &full_rect, dst_rect);
6548         }
6549         else
6550         {
6551             BOOL clip_horiz, clip_vert;
6552
6553             xdst = *dst_rect;
6554             clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6555             clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6556
6557             if (clip_vert || clip_horiz)
6558             {
6559                 /* Now check if this is a special case or not... */
6560                 if ((flags & WINEDDBLT_DDFX)
6561                         || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6562                         || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6563                 {
6564                     WARN("Out of screen rectangle in special case. Not handled right now.\n");
6565                     return WINED3D_OK;
6566                 }
6567
6568                 if (clip_horiz)
6569                 {
6570                     if (xdst.left < 0)
6571                     {
6572                         xsrc.left -= xdst.left;
6573                         xdst.left = 0;
6574                     }
6575                     if (xdst.right > dst_surface->resource.width)
6576                     {
6577                         xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6578                         xdst.right = (int)dst_surface->resource.width;
6579                     }
6580                 }
6581
6582                 if (clip_vert)
6583                 {
6584                     if (xdst.top < 0)
6585                     {
6586                         xsrc.top -= xdst.top;
6587                         xdst.top = 0;
6588                     }
6589                     if (xdst.bottom > dst_surface->resource.height)
6590                     {
6591                         xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6592                         xdst.bottom = (int)dst_surface->resource.height;
6593                     }
6594                 }
6595
6596                 /* And check if after clipping something is still to be done... */
6597                 if ((xdst.right <= 0) || (xdst.bottom <= 0)
6598                         || (xdst.left >= (int)dst_surface->resource.width)
6599                         || (xdst.top >= (int)dst_surface->resource.height)
6600                         || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6601                         || (xsrc.left >= (int)src_surface->resource.width)
6602                         || (xsrc.top >= (int)src_surface->resource.height))
6603                 {
6604                     TRACE("Nothing to be done after clipping.\n");
6605                     return WINED3D_OK;
6606                 }
6607             }
6608         }
6609     }
6610     else
6611     {
6612         xdst.left = 0;
6613         xdst.top = 0;
6614         xdst.right = dst_surface->resource.width;
6615         xdst.bottom = dst_surface->resource.height;
6616     }
6617
6618     if (src_surface == dst_surface)
6619     {
6620         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6621         slock = dlock;
6622         src_format = dst_surface->resource.format;
6623         dst_format = src_format;
6624     }
6625     else
6626     {
6627         dst_format = dst_surface->resource.format;
6628         if (src_surface)
6629         {
6630             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6631             {
6632                 src_surface = surface_convert_format(src_surface, dst_format->id);
6633                 if (!src_surface)
6634                 {
6635                     /* The conv function writes a FIXME */
6636                     WARN("Cannot convert source surface format to dest format.\n");
6637                     goto release;
6638                 }
6639             }
6640             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6641             src_format = src_surface->resource.format;
6642         }
6643         else
6644         {
6645             src_format = dst_format;
6646         }
6647         if (dst_rect)
6648             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6649         else
6650             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6651     }
6652
6653     if (!fx || !(fx->dwDDFX)) flags &= ~WINEDDBLT_DDFX;
6654
6655     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_FOURCC)
6656     {
6657         if (!dst_rect || src_surface == dst_surface)
6658         {
6659             memcpy(dlock.pBits, slock.pBits, dst_surface->resource.size);
6660             goto release;
6661         }
6662     }
6663
6664     bpp = dst_surface->resource.format->byte_count;
6665     srcheight = xsrc.bottom - xsrc.top;
6666     srcwidth = xsrc.right - xsrc.left;
6667     dstheight = xdst.bottom - xdst.top;
6668     dstwidth = xdst.right - xdst.left;
6669     width = (xdst.right - xdst.left) * bpp;
6670
6671     if (dst_rect && src_surface != dst_surface)
6672         dbuf = dlock.pBits;
6673     else
6674         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6675
6676     if (flags & WINEDDBLT_WAIT)
6677     {
6678         flags &= ~WINEDDBLT_WAIT;
6679     }
6680     if (flags & WINEDDBLT_ASYNC)
6681     {
6682         static BOOL displayed = FALSE;
6683         if (!displayed)
6684             FIXME("Can't handle WINEDDBLT_ASYNC flag right now.\n");
6685         displayed = TRUE;
6686         flags &= ~WINEDDBLT_ASYNC;
6687     }
6688     if (flags & WINEDDBLT_DONOTWAIT)
6689     {
6690         /* WINEDDBLT_DONOTWAIT appeared in DX7 */
6691         static BOOL displayed = FALSE;
6692         if (!displayed)
6693             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag right now.\n");
6694         displayed = TRUE;
6695         flags &= ~WINEDDBLT_DONOTWAIT;
6696     }
6697
6698     /* First, all the 'source-less' blits */
6699     if (flags & WINEDDBLT_COLORFILL)
6700     {
6701         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6702         flags &= ~WINEDDBLT_COLORFILL;
6703     }
6704
6705     if (flags & WINEDDBLT_DEPTHFILL)
6706     {
6707         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6708     }
6709     if (flags & WINEDDBLT_ROP)
6710     {
6711         /* Catch some degenerate cases here. */
6712         switch (fx->dwROP)
6713         {
6714             case BLACKNESS:
6715                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6716                 break;
6717             case 0xAA0029: /* No-op */
6718                 break;
6719             case WHITENESS:
6720                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6721                 break;
6722             case SRCCOPY: /* Well, we do that below? */
6723                 break;
6724             default:
6725                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6726                 goto error;
6727         }
6728         flags &= ~WINEDDBLT_ROP;
6729     }
6730     if (flags & WINEDDBLT_DDROPS)
6731     {
6732         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6733     }
6734     /* Now the 'with source' blits. */
6735     if (src_surface)
6736     {
6737         const BYTE *sbase;
6738         int sx, xinc, sy, yinc;
6739
6740         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6741             goto release;
6742
6743         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6744                 && (srcwidth != dstwidth || srcheight != dstheight))
6745         {
6746             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6747             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6748         }
6749
6750         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6751         xinc = (srcwidth << 16) / dstwidth;
6752         yinc = (srcheight << 16) / dstheight;
6753
6754         if (!flags)
6755         {
6756             /* No effects, we can cheat here. */
6757             if (dstwidth == srcwidth)
6758             {
6759                 if (dstheight == srcheight)
6760                 {
6761                     /* No stretching in either direction. This needs to be as
6762                      * fast as possible. */
6763                     sbuf = sbase;
6764
6765                     /* Check for overlapping surfaces. */
6766                     if (src_surface != dst_surface || xdst.top < xsrc.top
6767                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6768                     {
6769                         /* No overlap, or dst above src, so copy from top downwards. */
6770                         for (y = 0; y < dstheight; ++y)
6771                         {
6772                             memcpy(dbuf, sbuf, width);
6773                             sbuf += slock.Pitch;
6774                             dbuf += dlock.Pitch;
6775                         }
6776                     }
6777                     else if (xdst.top > xsrc.top)
6778                     {
6779                         /* Copy from bottom upwards. */
6780                         sbuf += (slock.Pitch*dstheight);
6781                         dbuf += (dlock.Pitch*dstheight);
6782                         for (y = 0; y < dstheight; ++y)
6783                         {
6784                             sbuf -= slock.Pitch;
6785                             dbuf -= dlock.Pitch;
6786                             memcpy(dbuf, sbuf, width);
6787                         }
6788                     }
6789                     else
6790                     {
6791                         /* Src and dst overlapping on the same line, use memmove. */
6792                         for (y = 0; y < dstheight; ++y)
6793                         {
6794                             memmove(dbuf, sbuf, width);
6795                             sbuf += slock.Pitch;
6796                             dbuf += dlock.Pitch;
6797                         }
6798                     }
6799                 }
6800                 else
6801                 {
6802                     /* Stretching in y direction only. */
6803                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6804                     {
6805                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6806                         memcpy(dbuf, sbuf, width);
6807                         dbuf += dlock.Pitch;
6808                     }
6809                 }
6810             }
6811             else
6812             {
6813                 /* Stretching in X direction. */
6814                 int last_sy = -1;
6815                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6816                 {
6817                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6818
6819                     if ((sy >> 16) == (last_sy >> 16))
6820                     {
6821                         /* This source row is the same as last source row -
6822                          * Copy the already stretched row. */
6823                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6824                     }
6825                     else
6826                     {
6827 #define STRETCH_ROW(type) \
6828 do { \
6829     const type *s = (const type *)sbuf; \
6830     type *d = (type *)dbuf; \
6831     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6832         d[x] = s[sx >> 16]; \
6833 } while(0)
6834
6835                         switch(bpp)
6836                         {
6837                             case 1:
6838                                 STRETCH_ROW(BYTE);
6839                                 break;
6840                             case 2:
6841                                 STRETCH_ROW(WORD);
6842                                 break;
6843                             case 4:
6844                                 STRETCH_ROW(DWORD);
6845                                 break;
6846                             case 3:
6847                             {
6848                                 const BYTE *s;
6849                                 BYTE *d = dbuf;
6850                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6851                                 {
6852                                     DWORD pixel;
6853
6854                                     s = sbuf + 3 * (sx >> 16);
6855                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6856                                     d[0] = (pixel      ) & 0xff;
6857                                     d[1] = (pixel >>  8) & 0xff;
6858                                     d[2] = (pixel >> 16) & 0xff;
6859                                     d += 3;
6860                                 }
6861                                 break;
6862                             }
6863                             default:
6864                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6865                                 hr = WINED3DERR_NOTAVAILABLE;
6866                                 goto error;
6867                         }
6868 #undef STRETCH_ROW
6869                     }
6870                     dbuf += dlock.Pitch;
6871                     last_sy = sy;
6872                 }
6873             }
6874         }
6875         else
6876         {
6877             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6878             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6879             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6880             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6881             {
6882                 /* The color keying flags are checked for correctness in ddraw */
6883                 if (flags & WINEDDBLT_KEYSRC)
6884                 {
6885                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6886                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6887                 }
6888                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6889                 {
6890                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6891                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6892                 }
6893
6894                 if (flags & WINEDDBLT_KEYDEST)
6895                 {
6896                     /* Destination color keys are taken from the source surface! */
6897                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6898                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6899                 }
6900                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6901                 {
6902                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6903                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6904                 }
6905
6906                 if (bpp == 1)
6907                 {
6908                     keymask = 0xff;
6909                 }
6910                 else
6911                 {
6912                     keymask = src_format->red_mask
6913                             | src_format->green_mask
6914                             | src_format->blue_mask;
6915                 }
6916                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6917             }
6918
6919             if (flags & WINEDDBLT_DDFX)
6920             {
6921                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6922                 LONG tmpxy;
6923                 dTopLeft     = dbuf;
6924                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6925                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6926                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6927
6928                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6929                 {
6930                     /* I don't think we need to do anything about this flag */
6931                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6932                 }
6933                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6934                 {
6935                     tmp          = dTopRight;
6936                     dTopRight    = dTopLeft;
6937                     dTopLeft     = tmp;
6938                     tmp          = dBottomRight;
6939                     dBottomRight = dBottomLeft;
6940                     dBottomLeft  = tmp;
6941                     dstxinc = dstxinc * -1;
6942                 }
6943                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6944                 {
6945                     tmp          = dTopLeft;
6946                     dTopLeft     = dBottomLeft;
6947                     dBottomLeft  = tmp;
6948                     tmp          = dTopRight;
6949                     dTopRight    = dBottomRight;
6950                     dBottomRight = tmp;
6951                     dstyinc = dstyinc * -1;
6952                 }
6953                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6954                 {
6955                     /* I don't think we need to do anything about this flag */
6956                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6957                 }
6958                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6959                 {
6960                     tmp          = dBottomRight;
6961                     dBottomRight = dTopLeft;
6962                     dTopLeft     = tmp;
6963                     tmp          = dBottomLeft;
6964                     dBottomLeft  = dTopRight;
6965                     dTopRight    = tmp;
6966                     dstxinc = dstxinc * -1;
6967                     dstyinc = dstyinc * -1;
6968                 }
6969                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6970                 {
6971                     tmp          = dTopLeft;
6972                     dTopLeft     = dBottomLeft;
6973                     dBottomLeft  = dBottomRight;
6974                     dBottomRight = dTopRight;
6975                     dTopRight    = tmp;
6976                     tmpxy   = dstxinc;
6977                     dstxinc = dstyinc;
6978                     dstyinc = tmpxy;
6979                     dstxinc = dstxinc * -1;
6980                 }
6981                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6982                 {
6983                     tmp          = dTopLeft;
6984                     dTopLeft     = dTopRight;
6985                     dTopRight    = dBottomRight;
6986                     dBottomRight = dBottomLeft;
6987                     dBottomLeft  = tmp;
6988                     tmpxy   = dstxinc;
6989                     dstxinc = dstyinc;
6990                     dstyinc = tmpxy;
6991                     dstyinc = dstyinc * -1;
6992                 }
6993                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6994                 {
6995                     /* I don't think we need to do anything about this flag */
6996                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6997                 }
6998                 dbuf = dTopLeft;
6999                 flags &= ~(WINEDDBLT_DDFX);
7000             }
7001
7002 #define COPY_COLORKEY_FX(type) \
7003 do { \
7004     const type *s; \
7005     type *d = (type *)dbuf, *dx, tmp; \
7006     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7007     { \
7008         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
7009         dx = d; \
7010         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7011         { \
7012             tmp = s[sx >> 16]; \
7013             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7014                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7015             { \
7016                 dx[0] = tmp; \
7017             } \
7018             dx = (type *)(((BYTE *)dx) + dstxinc); \
7019         } \
7020         d = (type *)(((BYTE *)d) + dstyinc); \
7021     } \
7022 } while(0)
7023
7024             switch (bpp)
7025             {
7026                 case 1:
7027                     COPY_COLORKEY_FX(BYTE);
7028                     break;
7029                 case 2:
7030                     COPY_COLORKEY_FX(WORD);
7031                     break;
7032                 case 4:
7033                     COPY_COLORKEY_FX(DWORD);
7034                     break;
7035                 case 3:
7036                 {
7037                     const BYTE *s;
7038                     BYTE *d = dbuf, *dx;
7039                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7040                     {
7041                         sbuf = sbase + (sy >> 16) * slock.Pitch;
7042                         dx = d;
7043                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7044                         {
7045                             DWORD pixel, dpixel = 0;
7046                             s = sbuf + 3 * (sx>>16);
7047                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7048                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7049                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7050                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7051                             {
7052                                 dx[0] = (pixel      ) & 0xff;
7053                                 dx[1] = (pixel >>  8) & 0xff;
7054                                 dx[2] = (pixel >> 16) & 0xff;
7055                             }
7056                             dx += dstxinc;
7057                         }
7058                         d += dstyinc;
7059                     }
7060                     break;
7061                 }
7062                 default:
7063                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7064                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7065                     hr = WINED3DERR_NOTAVAILABLE;
7066                     goto error;
7067 #undef COPY_COLORKEY_FX
7068             }
7069         }
7070     }
7071
7072 error:
7073     if (flags && FIXME_ON(d3d_surface))
7074     {
7075         FIXME("\tUnsupported flags: %#x.\n", flags);
7076     }
7077
7078 release:
7079     wined3d_surface_unmap(dst_surface);
7080     if (src_surface && src_surface != dst_surface)
7081         wined3d_surface_unmap(src_surface);
7082     /* Release the converted surface, if any. */
7083     if (src_surface && src_surface != orig_src)
7084         wined3d_surface_decref(src_surface);
7085
7086     return hr;
7087 }
7088
7089 static HRESULT surface_cpu_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
7090         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
7091 {
7092     const struct wined3d_format *src_format, *dst_format;
7093     RECT lock_src, lock_dst, lock_union;
7094     WINED3DLOCKED_RECT dlock, slock;
7095     HRESULT hr = WINED3D_OK;
7096     int bpp, w, h, x, y;
7097     const BYTE *sbuf;
7098     BYTE *dbuf;
7099     RECT rsrc2;
7100
7101     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
7102             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
7103
7104     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface->flags & SFLAG_LOCKED))
7105     {
7106         WARN(" Surface is busy, returning DDERR_SURFACEBUSY\n");
7107         return WINEDDERR_SURFACEBUSY;
7108     }
7109
7110     if (!src_rect)
7111     {
7112         WARN("src_rect is NULL!\n");
7113         rsrc2.left = 0;
7114         rsrc2.top = 0;
7115         rsrc2.right = src_surface->resource.width;
7116         rsrc2.bottom = src_surface->resource.height;
7117         src_rect = &rsrc2;
7118     }
7119
7120     /* Check source rect for validity. Copied from normal Blt. Fixes Baldur's Gate. */
7121     if ((src_rect->bottom > src_surface->resource.height) || (src_rect->bottom < 0)
7122             || (src_rect->top > src_surface->resource.height) || (src_rect->top < 0)
7123             || (src_rect->left > src_surface->resource.width) || (src_rect->left < 0)
7124             || (src_rect->right > src_surface->resource.width) || (src_rect->right < 0)
7125             || (src_rect->right < src_rect->left) || (src_rect->bottom < src_rect->top))
7126     {
7127         WARN("Application gave us bad source rectangle for BltFast.\n");
7128         return WINEDDERR_INVALIDRECT;
7129     }
7130
7131     h = src_rect->bottom - src_rect->top;
7132     if (h > dst_surface->resource.height - dst_y)
7133         h = dst_surface->resource.height - dst_y;
7134     if (h > src_surface->resource.height - src_rect->top)
7135         h = src_surface->resource.height - src_rect->top;
7136     if (h <= 0)
7137         return WINEDDERR_INVALIDRECT;
7138
7139     w = src_rect->right - src_rect->left;
7140     if (w > dst_surface->resource.width - dst_x)
7141         w = dst_surface->resource.width - dst_x;
7142     if (w > src_surface->resource.width - src_rect->left)
7143         w = src_surface->resource.width - src_rect->left;
7144     if (w <= 0)
7145         return WINEDDERR_INVALIDRECT;
7146
7147     /* Now compute the locking rectangle... */
7148     lock_src.left = src_rect->left;
7149     lock_src.top = src_rect->top;
7150     lock_src.right = lock_src.left + w;
7151     lock_src.bottom = lock_src.top + h;
7152
7153     lock_dst.left = dst_x;
7154     lock_dst.top = dst_y;
7155     lock_dst.right = dst_x + w;
7156     lock_dst.bottom = dst_y + h;
7157
7158     bpp = dst_surface->resource.format->byte_count;
7159
7160     /* We need to lock the surfaces, or we won't get refreshes when done. */
7161     if (src_surface == dst_surface)
7162     {
7163         int pitch;
7164
7165         UnionRect(&lock_union, &lock_src, &lock_dst);
7166
7167         /* Lock the union of the two rectangles. */
7168         hr = wined3d_surface_map(dst_surface, &dlock, &lock_union, 0);
7169         if (FAILED(hr))
7170             goto error;
7171
7172         pitch = dlock.Pitch;
7173         slock.Pitch = dlock.Pitch;
7174
7175         /* Since slock was originally copied from this surface's description, we can just reuse it. */
7176         sbuf = dst_surface->resource.allocatedMemory + lock_src.top * pitch + lock_src.left * bpp;
7177         dbuf = dst_surface->resource.allocatedMemory + lock_dst.top * pitch + lock_dst.left * bpp;
7178         src_format = src_surface->resource.format;
7179         dst_format = src_format;
7180     }
7181     else
7182     {
7183         hr = wined3d_surface_map(src_surface, &slock, &lock_src, WINED3DLOCK_READONLY);
7184         if (FAILED(hr))
7185             goto error;
7186         hr = wined3d_surface_map(dst_surface, &dlock, &lock_dst, 0);
7187         if (FAILED(hr))
7188             goto error;
7189
7190         sbuf = slock.pBits;
7191         dbuf = dlock.pBits;
7192         TRACE("Dst is at %p, Src is at %p.\n", dbuf, sbuf);
7193
7194         src_format = src_surface->resource.format;
7195         dst_format = dst_surface->resource.format;
7196     }
7197
7198     /* Handle compressed surfaces first... */
7199     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
7200     {
7201         UINT row_block_count;
7202
7203         TRACE("compressed -> compressed copy\n");
7204         if (trans)
7205             FIXME("trans arg not supported when a compressed surface is involved\n");
7206         if (dst_x || dst_y)
7207             FIXME("offset for destination surface is not supported\n");
7208         if (src_surface->resource.format->id != dst_surface->resource.format->id)
7209         {
7210             FIXME("compressed -> compressed copy only supported for the same type of surface\n");
7211             hr = WINED3DERR_WRONGTEXTUREFORMAT;
7212             goto error;
7213         }
7214
7215         row_block_count = (w + dst_format->block_width - 1) / dst_format->block_width;
7216         for (y = 0; y < h; y += dst_format->block_height)
7217         {
7218             memcpy(dbuf, sbuf, row_block_count * dst_format->block_byte_count);
7219             dbuf += dlock.Pitch;
7220             sbuf += slock.Pitch;
7221         }
7222
7223         goto error;
7224     }
7225     if ((src_format->flags & WINED3DFMT_FLAG_COMPRESSED) && !(dst_format->flags & WINED3DFMT_FLAG_COMPRESSED))
7226     {
7227         /* TODO: Use the libtxc_dxtn.so shared library to do software
7228          * decompression. */
7229         ERR("Software decompression not supported.\n");
7230         goto error;
7231     }
7232
7233     if (trans & (WINEDDBLTFAST_SRCCOLORKEY | WINEDDBLTFAST_DESTCOLORKEY))
7234     {
7235         DWORD keylow, keyhigh;
7236         DWORD mask = src_surface->resource.format->red_mask
7237                 | src_surface->resource.format->green_mask
7238                 | src_surface->resource.format->blue_mask;
7239
7240         /* For some 8-bit formats like L8 and P8 color masks don't make sense */
7241         if (!mask && bpp == 1)
7242             mask = 0xff;
7243
7244         TRACE("Color keyed copy.\n");
7245         if (trans & WINEDDBLTFAST_SRCCOLORKEY)
7246         {
7247             keylow = src_surface->SrcBltCKey.dwColorSpaceLowValue;
7248             keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
7249         }
7250         else
7251         {
7252             /* I'm not sure if this is correct. */
7253             FIXME("WINEDDBLTFAST_DESTCOLORKEY not fully supported yet.\n");
7254             keylow = dst_surface->DestBltCKey.dwColorSpaceLowValue;
7255             keyhigh = dst_surface->DestBltCKey.dwColorSpaceHighValue;
7256         }
7257
7258 #define COPYBOX_COLORKEY(type) \
7259 do { \
7260     const type *s = (const type *)sbuf; \
7261     type *d = (type *)dbuf; \
7262     type tmp; \
7263     for (y = 0; y < h; y++) \
7264     { \
7265         for (x = 0; x < w; x++) \
7266         { \
7267             tmp = s[x]; \
7268             if ((tmp & mask) < keylow || (tmp & mask) > keyhigh) d[x] = tmp; \
7269         } \
7270         s = (const type *)((const BYTE *)s + slock.Pitch); \
7271         d = (type *)((BYTE *)d + dlock.Pitch); \
7272     } \
7273 } while(0)
7274
7275         switch (bpp)
7276         {
7277             case 1:
7278                 COPYBOX_COLORKEY(BYTE);
7279                 break;
7280             case 2:
7281                 COPYBOX_COLORKEY(WORD);
7282                 break;
7283             case 4:
7284                 COPYBOX_COLORKEY(DWORD);
7285                 break;
7286             case 3:
7287             {
7288                 const BYTE *s;
7289                 DWORD tmp;
7290                 BYTE *d;
7291                 s = sbuf;
7292                 d = dbuf;
7293                 for (y = 0; y < h; ++y)
7294                 {
7295                     for (x = 0; x < w * 3; x += 3)
7296                     {
7297                         tmp = (DWORD)s[x] + ((DWORD)s[x + 1] << 8) + ((DWORD)s[x + 2] << 16);
7298                         if (tmp < keylow || tmp > keyhigh)
7299                         {
7300                             d[x + 0] = s[x + 0];
7301                             d[x + 1] = s[x + 1];
7302                             d[x + 2] = s[x + 2];
7303                         }
7304                     }
7305                     s += slock.Pitch;
7306                     d += dlock.Pitch;
7307                 }
7308                 break;
7309             }
7310             default:
7311                 FIXME("Source color key blitting not supported for bpp %u.\n", bpp * 8);
7312                 hr = WINED3DERR_NOTAVAILABLE;
7313                 goto error;
7314         }
7315 #undef COPYBOX_COLORKEY
7316         TRACE("Copy done.\n");
7317     }
7318     else
7319     {
7320         int width = w * bpp;
7321         INT sbufpitch, dbufpitch;
7322
7323         TRACE("No color key copy.\n");
7324         /* Handle overlapping surfaces. */
7325         if (sbuf < dbuf)
7326         {
7327             sbuf += (h - 1) * slock.Pitch;
7328             dbuf += (h - 1) * dlock.Pitch;
7329             sbufpitch = -slock.Pitch;
7330             dbufpitch = -dlock.Pitch;
7331         }
7332         else
7333         {
7334             sbufpitch = slock.Pitch;
7335             dbufpitch = dlock.Pitch;
7336         }
7337         for (y = 0; y < h; ++y)
7338         {
7339             /* This is pretty easy, a line for line memcpy. */
7340             memmove(dbuf, sbuf, width);
7341             sbuf += sbufpitch;
7342             dbuf += dbufpitch;
7343         }
7344         TRACE("Copy done.\n");
7345     }
7346
7347 error:
7348     if (src_surface == dst_surface)
7349     {
7350         wined3d_surface_unmap(dst_surface);
7351     }
7352     else
7353     {
7354         wined3d_surface_unmap(dst_surface);
7355         wined3d_surface_unmap(src_surface);
7356     }
7357
7358     return hr;
7359 }
7360
7361 /* Do not call while under the GL lock. */
7362 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7363         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7364 {
7365     WINEDDBLTFX BltFx;
7366
7367     memset(&BltFx, 0, sizeof(BltFx));
7368     BltFx.dwSize = sizeof(BltFx);
7369     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface->resource.format, color);
7370     return wined3d_surface_blt(dst_surface, dst_rect, NULL, NULL,
7371             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7372 }
7373
7374 /* Do not call while under the GL lock. */
7375 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7376         struct wined3d_surface *surface, const RECT *rect, float depth)
7377 {
7378     FIXME("Depth filling not implemented by cpu_blit.\n");
7379     return WINED3DERR_INVALIDCALL;
7380 }
7381
7382 const struct blit_shader cpu_blit =  {
7383     cpu_blit_alloc,
7384     cpu_blit_free,
7385     cpu_blit_set,
7386     cpu_blit_unset,
7387     cpu_blit_supported,
7388     cpu_blit_color_fill,
7389     cpu_blit_depth_fill,
7390 };
7391
7392 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7393         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7394         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7395         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7396 {
7397     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7398     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7399     unsigned int resource_size;
7400     HRESULT hr;
7401
7402     if (multisample_quality > 0)
7403     {
7404         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7405         multisample_quality = 0;
7406     }
7407
7408     /* Quick lockable sanity check.
7409      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7410      * this function is too deep to need to care about things like this.
7411      * Levels need to be checked too, since they all affect what can be done. */
7412     switch (pool)
7413     {
7414         case WINED3DPOOL_SCRATCH:
7415             if (!lockable)
7416             {
7417                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7418                         "which are mutually exclusive, setting lockable to TRUE.\n");
7419                 lockable = TRUE;
7420             }
7421             break;
7422
7423         case WINED3DPOOL_SYSTEMMEM:
7424             if (!lockable)
7425                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7426             break;
7427
7428         case WINED3DPOOL_MANAGED:
7429             if (usage & WINED3DUSAGE_DYNAMIC)
7430                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7431             break;
7432
7433         case WINED3DPOOL_DEFAULT:
7434             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7435                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7436             break;
7437
7438         default:
7439             FIXME("Unknown pool %#x.\n", pool);
7440             break;
7441     };
7442
7443     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7444         FIXME("Trying to create a render target that isn't in the default pool.\n");
7445
7446     /* FIXME: Check that the format is supported by the device. */
7447
7448     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7449     if (!resource_size)
7450         return WINED3DERR_INVALIDCALL;
7451
7452     surface->surface_type = surface_type;
7453
7454     switch (surface_type)
7455     {
7456         case SURFACE_OPENGL:
7457             surface->surface_ops = &surface_ops;
7458             break;
7459
7460         case SURFACE_GDI:
7461             surface->surface_ops = &gdi_surface_ops;
7462             break;
7463
7464         default:
7465             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7466             return WINED3DERR_INVALIDCALL;
7467     }
7468
7469     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7470             multisample_type, multisample_quality, usage, pool, width, height, 1,
7471             resource_size, parent, parent_ops, &surface_resource_ops);
7472     if (FAILED(hr))
7473     {
7474         WARN("Failed to initialize resource, returning %#x.\n", hr);
7475         return hr;
7476     }
7477
7478     /* "Standalone" surface. */
7479     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7480
7481     surface->texture_level = level;
7482     list_init(&surface->overlays);
7483
7484     /* Flags */
7485     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7486     if (discard)
7487         surface->flags |= SFLAG_DISCARD;
7488     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7489         surface->flags |= SFLAG_LOCKABLE;
7490     /* I'm not sure if this qualifies as a hack or as an optimization. It
7491      * seems reasonable to assume that lockable render targets will get
7492      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7493      * creation. However, the other reason we want to do this is that several
7494      * ddraw applications access surface memory while the surface isn't
7495      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7496      * future locks prevents these from crashing. */
7497     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7498         surface->flags |= SFLAG_DYNLOCK;
7499
7500     /* Mark the texture as dirty so that it gets loaded first time around. */
7501     surface_add_dirty_rect(surface, NULL);
7502     list_init(&surface->renderbuffers);
7503
7504     TRACE("surface %p, memory %p, size %u\n",
7505             surface, surface->resource.allocatedMemory, surface->resource.size);
7506
7507     /* Call the private setup routine */
7508     hr = surface->surface_ops->surface_private_setup(surface);
7509     if (FAILED(hr))
7510     {
7511         ERR("Private setup failed, returning %#x\n", hr);
7512         surface->surface_ops->surface_cleanup(surface);
7513         return hr;
7514     }
7515
7516     return hr;
7517 }
7518
7519 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7520         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7521         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7522         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7523 {
7524     struct wined3d_surface *object;
7525     HRESULT hr;
7526
7527     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7528             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7529     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7530             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7531     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7532
7533     if (surface_type == SURFACE_OPENGL && !device->adapter)
7534     {
7535         ERR("OpenGL surfaces are not available without OpenGL.\n");
7536         return WINED3DERR_NOTAVAILABLE;
7537     }
7538
7539     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7540     if (!object)
7541     {
7542         ERR("Failed to allocate surface memory.\n");
7543         return WINED3DERR_OUTOFVIDEOMEMORY;
7544     }
7545
7546     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7547             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7548     if (FAILED(hr))
7549     {
7550         WARN("Failed to initialize surface, returning %#x.\n", hr);
7551         HeapFree(GetProcessHeap(), 0, object);
7552         return hr;
7553     }
7554
7555     TRACE("Created surface %p.\n", object);
7556     *surface = object;
7557
7558     return hr;
7559 }