wined3d: Print an ERR for unknown / invalid surface locations in surface_load_location().
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT surface_cpu_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans);
41 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
42         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
43         WINED3DTEXTUREFILTERTYPE filter);
44
45 static void surface_cleanup(struct wined3d_surface *surface)
46 {
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO) || !list_empty(&surface->renderbuffers))
50     {
51         struct wined3d_renderbuffer_entry *entry, *entry2;
52         const struct wined3d_gl_info *gl_info;
53         struct wined3d_context *context;
54
55         context = context_acquire(surface->resource.device, NULL);
56         gl_info = context->gl_info;
57
58         ENTER_GL();
59
60         if (surface->texture_name)
61         {
62             TRACE("Deleting texture %u.\n", surface->texture_name);
63             glDeleteTextures(1, &surface->texture_name);
64         }
65
66         if (surface->flags & SFLAG_PBO)
67         {
68             TRACE("Deleting PBO %u.\n", surface->pbo);
69             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
70         }
71
72         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
73         {
74             TRACE("Deleting renderbuffer %u.\n", entry->id);
75             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
76             HeapFree(GetProcessHeap(), 0, entry);
77         }
78
79         LEAVE_GL();
80
81         context_release(context);
82     }
83
84     if (surface->flags & SFLAG_DIBSECTION)
85     {
86         /* Release the DC. */
87         SelectObject(surface->hDC, surface->dib.holdbitmap);
88         DeleteDC(surface->hDC);
89         /* Release the DIB section. */
90         DeleteObject(surface->dib.DIBsection);
91         surface->dib.bitmap_data = NULL;
92         surface->resource.allocatedMemory = NULL;
93     }
94
95     if (surface->flags & SFLAG_USERPTR)
96         wined3d_surface_set_mem(surface, NULL);
97     if (surface->overlay_dest)
98         list_remove(&surface->overlay_entry);
99
100     HeapFree(GetProcessHeap(), 0, surface->palette9);
101
102     resource_cleanup(&surface->resource);
103 }
104
105 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
106 {
107     TRACE("surface %p, container %p.\n", surface, container);
108
109     if (!container && type != WINED3D_CONTAINER_NONE)
110         ERR("Setting NULL container of type %#x.\n", type);
111
112     if (type == WINED3D_CONTAINER_SWAPCHAIN)
113     {
114         surface->get_drawable_size = get_drawable_size_swapchain;
115     }
116     else
117     {
118         switch (wined3d_settings.offscreen_rendering_mode)
119         {
120             case ORM_FBO:
121                 surface->get_drawable_size = get_drawable_size_fbo;
122                 break;
123
124             case ORM_BACKBUFFER:
125                 surface->get_drawable_size = get_drawable_size_backbuffer;
126                 break;
127
128             default:
129                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
130                 return;
131         }
132     }
133
134     surface->container.type = type;
135     surface->container.u.base = container;
136 }
137
138 struct blt_info
139 {
140     GLenum binding;
141     GLenum bind_target;
142     enum tex_types tex_type;
143     GLfloat coords[4][3];
144 };
145
146 struct float_rect
147 {
148     float l;
149     float t;
150     float r;
151     float b;
152 };
153
154 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
155 {
156     f->l = ((r->left * 2.0f) / w) - 1.0f;
157     f->t = ((r->top * 2.0f) / h) - 1.0f;
158     f->r = ((r->right * 2.0f) / w) - 1.0f;
159     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
160 }
161
162 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
163 {
164     GLfloat (*coords)[3] = info->coords;
165     struct float_rect f;
166
167     switch (target)
168     {
169         default:
170             FIXME("Unsupported texture target %#x\n", target);
171             /* Fall back to GL_TEXTURE_2D */
172         case GL_TEXTURE_2D:
173             info->binding = GL_TEXTURE_BINDING_2D;
174             info->bind_target = GL_TEXTURE_2D;
175             info->tex_type = tex_2d;
176             coords[0][0] = (float)rect->left / w;
177             coords[0][1] = (float)rect->top / h;
178             coords[0][2] = 0.0f;
179
180             coords[1][0] = (float)rect->right / w;
181             coords[1][1] = (float)rect->top / h;
182             coords[1][2] = 0.0f;
183
184             coords[2][0] = (float)rect->left / w;
185             coords[2][1] = (float)rect->bottom / h;
186             coords[2][2] = 0.0f;
187
188             coords[3][0] = (float)rect->right / w;
189             coords[3][1] = (float)rect->bottom / h;
190             coords[3][2] = 0.0f;
191             break;
192
193         case GL_TEXTURE_RECTANGLE_ARB:
194             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
195             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
196             info->tex_type = tex_rect;
197             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
198             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
199             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
200             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
201             break;
202
203         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
204             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
205             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
206             info->tex_type = tex_cube;
207             cube_coords_float(rect, w, h, &f);
208
209             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
210             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
211             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
212             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
213             break;
214
215         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
216             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
217             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
218             info->tex_type = tex_cube;
219             cube_coords_float(rect, w, h, &f);
220
221             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
222             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
223             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
224             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
225             break;
226
227         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
228             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
229             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
230             info->tex_type = tex_cube;
231             cube_coords_float(rect, w, h, &f);
232
233             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
234             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
235             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
236             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
237             break;
238
239         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
240             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
241             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
242             info->tex_type = tex_cube;
243             cube_coords_float(rect, w, h, &f);
244
245             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
246             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
247             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
248             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
249             break;
250
251         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
252             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
253             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
254             info->tex_type = tex_cube;
255             cube_coords_float(rect, w, h, &f);
256
257             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
258             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
259             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
260             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
261             break;
262
263         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
264             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
265             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
266             info->tex_type = tex_cube;
267             cube_coords_float(rect, w, h, &f);
268
269             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
270             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
271             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
272             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
273             break;
274     }
275 }
276
277 static inline void surface_get_rect(struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
278 {
279     if (rect_in)
280         *rect_out = *rect_in;
281     else
282     {
283         rect_out->left = 0;
284         rect_out->top = 0;
285         rect_out->right = surface->resource.width;
286         rect_out->bottom = surface->resource.height;
287     }
288 }
289
290 /* GL locking and context activation is done by the caller */
291 void draw_textured_quad(struct wined3d_surface *src_surface, const RECT *src_rect,
292         const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
293 {
294     struct blt_info info;
295
296     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
297
298     glEnable(info.bind_target);
299     checkGLcall("glEnable(bind_target)");
300
301     /* Bind the texture */
302     glBindTexture(info.bind_target, src_surface->texture_name);
303     checkGLcall("glBindTexture");
304
305     /* Filtering for StretchRect */
306     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
307             wined3d_gl_mag_filter(magLookup, Filter));
308     checkGLcall("glTexParameteri");
309     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
310             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
311     checkGLcall("glTexParameteri");
312     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
313     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
314     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
315     checkGLcall("glTexEnvi");
316
317     /* Draw a quad */
318     glBegin(GL_TRIANGLE_STRIP);
319     glTexCoord3fv(info.coords[0]);
320     glVertex2i(dst_rect->left, dst_rect->top);
321
322     glTexCoord3fv(info.coords[1]);
323     glVertex2i(dst_rect->right, dst_rect->top);
324
325     glTexCoord3fv(info.coords[2]);
326     glVertex2i(dst_rect->left, dst_rect->bottom);
327
328     glTexCoord3fv(info.coords[3]);
329     glVertex2i(dst_rect->right, dst_rect->bottom);
330     glEnd();
331
332     /* Unbind the texture */
333     glBindTexture(info.bind_target, 0);
334     checkGLcall("glBindTexture(info->bind_target, 0)");
335
336     /* We changed the filtering settings on the texture. Inform the
337      * container about this to get the filters reset properly next draw. */
338     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
339     {
340         struct wined3d_texture *texture = src_surface->container.u.texture;
341         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
342         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
343         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
344     }
345 }
346
347 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
348 {
349     const struct wined3d_format *format = surface->resource.format;
350     SYSTEM_INFO sysInfo;
351     BITMAPINFO *b_info;
352     int extraline = 0;
353     DWORD *masks;
354     UINT usage;
355     HDC dc;
356
357     TRACE("surface %p.\n", surface);
358
359     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
360     {
361         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
362         return WINED3DERR_INVALIDCALL;
363     }
364
365     switch (format->byte_count)
366     {
367         case 2:
368         case 4:
369             /* Allocate extra space to store the RGB bit masks. */
370             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
371             break;
372
373         case 3:
374             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
375             break;
376
377         default:
378             /* Allocate extra space for a palette. */
379             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
380                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
381             break;
382     }
383
384     if (!b_info)
385         return E_OUTOFMEMORY;
386
387     /* Some applications access the surface in via DWORDs, and do not take
388      * the necessary care at the end of the surface. So we need at least
389      * 4 extra bytes at the end of the surface. Check against the page size,
390      * if the last page used for the surface has at least 4 spare bytes we're
391      * safe, otherwise add an extra line to the DIB section. */
392     GetSystemInfo(&sysInfo);
393     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
394     {
395         extraline = 1;
396         TRACE("Adding an extra line to the DIB section.\n");
397     }
398
399     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
400     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
401     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
402     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
403     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
404             * wined3d_surface_get_pitch(surface);
405     b_info->bmiHeader.biPlanes = 1;
406     b_info->bmiHeader.biBitCount = format->byte_count * 8;
407
408     b_info->bmiHeader.biXPelsPerMeter = 0;
409     b_info->bmiHeader.biYPelsPerMeter = 0;
410     b_info->bmiHeader.biClrUsed = 0;
411     b_info->bmiHeader.biClrImportant = 0;
412
413     /* Get the bit masks */
414     masks = (DWORD *)b_info->bmiColors;
415     switch (surface->resource.format->id)
416     {
417         case WINED3DFMT_B8G8R8_UNORM:
418             usage = DIB_RGB_COLORS;
419             b_info->bmiHeader.biCompression = BI_RGB;
420             break;
421
422         case WINED3DFMT_B5G5R5X1_UNORM:
423         case WINED3DFMT_B5G5R5A1_UNORM:
424         case WINED3DFMT_B4G4R4A4_UNORM:
425         case WINED3DFMT_B4G4R4X4_UNORM:
426         case WINED3DFMT_B2G3R3_UNORM:
427         case WINED3DFMT_B2G3R3A8_UNORM:
428         case WINED3DFMT_R10G10B10A2_UNORM:
429         case WINED3DFMT_R8G8B8A8_UNORM:
430         case WINED3DFMT_R8G8B8X8_UNORM:
431         case WINED3DFMT_B10G10R10A2_UNORM:
432         case WINED3DFMT_B5G6R5_UNORM:
433         case WINED3DFMT_R16G16B16A16_UNORM:
434             usage = 0;
435             b_info->bmiHeader.biCompression = BI_BITFIELDS;
436             masks[0] = format->red_mask;
437             masks[1] = format->green_mask;
438             masks[2] = format->blue_mask;
439             break;
440
441         default:
442             /* Don't know palette */
443             b_info->bmiHeader.biCompression = BI_RGB;
444             usage = 0;
445             break;
446     }
447
448     if (!(dc = GetDC(0)))
449     {
450         HeapFree(GetProcessHeap(), 0, b_info);
451         return HRESULT_FROM_WIN32(GetLastError());
452     }
453
454     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
455             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
456             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
457     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
458     ReleaseDC(0, dc);
459
460     if (!surface->dib.DIBsection)
461     {
462         ERR("Failed to create DIB section.\n");
463         HeapFree(GetProcessHeap(), 0, b_info);
464         return HRESULT_FROM_WIN32(GetLastError());
465     }
466
467     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
468     /* Copy the existing surface to the dib section. */
469     if (surface->resource.allocatedMemory)
470     {
471         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
472                 surface->resource.height * wined3d_surface_get_pitch(surface));
473     }
474     else
475     {
476         /* This is to make maps read the GL texture although memory is allocated. */
477         surface->flags &= ~SFLAG_INSYSMEM;
478     }
479     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
480
481     HeapFree(GetProcessHeap(), 0, b_info);
482
483     /* Now allocate a DC. */
484     surface->hDC = CreateCompatibleDC(0);
485     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
486     TRACE("Using wined3d palette %p.\n", surface->palette);
487     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
488
489     surface->flags |= SFLAG_DIBSECTION;
490
491     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
492     surface->resource.heapMemory = NULL;
493
494     return WINED3D_OK;
495 }
496
497 static void surface_prepare_system_memory(struct wined3d_surface *surface)
498 {
499     struct wined3d_device *device = surface->resource.device;
500     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
501
502     TRACE("surface %p.\n", surface);
503
504     /* Performance optimization: Count how often a surface is locked, if it is
505      * locked regularly do not throw away the system memory copy. This avoids
506      * the need to download the surface from OpenGL all the time. The surface
507      * is still downloaded if the OpenGL texture is changed. */
508     if (!(surface->flags & SFLAG_DYNLOCK))
509     {
510         if (++surface->lockCount > MAXLOCKCOUNT)
511         {
512             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
513             surface->flags |= SFLAG_DYNLOCK;
514         }
515     }
516
517     /* Create a PBO for dynamically locked surfaces but don't do it for
518      * converted or NPOT surfaces. Also don't create a PBO for systemmem
519      * surfaces. */
520     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
521             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
522             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
523     {
524         struct wined3d_context *context;
525         GLenum error;
526
527         context = context_acquire(device, NULL);
528         ENTER_GL();
529
530         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
531         error = glGetError();
532         if (!surface->pbo || error != GL_NO_ERROR)
533             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
534
535         TRACE("Binding PBO %u.\n", surface->pbo);
536
537         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
538         checkGLcall("glBindBufferARB");
539
540         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
541                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
542         checkGLcall("glBufferDataARB");
543
544         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
545         checkGLcall("glBindBufferARB");
546
547         /* We don't need the system memory anymore and we can't even use it for PBOs. */
548         if (!(surface->flags & SFLAG_CLIENT))
549         {
550             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
551             surface->resource.heapMemory = NULL;
552         }
553         surface->resource.allocatedMemory = NULL;
554         surface->flags |= SFLAG_PBO;
555         LEAVE_GL();
556         context_release(context);
557     }
558     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
559     {
560         /* Whatever surface we have, make sure that there is memory allocated
561          * for the downloaded copy, or a PBO to map. */
562         if (!surface->resource.heapMemory)
563             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
564
565         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
566                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
567
568         if (surface->flags & SFLAG_INSYSMEM)
569             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
570     }
571 }
572
573 static void surface_evict_sysmem(struct wined3d_surface *surface)
574 {
575     if (surface->flags & SFLAG_DONOTFREE)
576         return;
577
578     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
579     surface->resource.allocatedMemory = NULL;
580     surface->resource.heapMemory = NULL;
581     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
582 }
583
584 /* Context activation is done by the caller. */
585 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
586         const struct wined3d_gl_info *gl_info, BOOL srgb)
587 {
588     struct wined3d_device *device = surface->resource.device;
589     DWORD active_sampler;
590     GLint active_texture;
591
592     /* We don't need a specific texture unit, but after binding the texture
593      * the current unit is dirty. Read the unit back instead of switching to
594      * 0, this avoids messing around with the state manager's GL states. The
595      * current texture unit should always be a valid one.
596      *
597      * To be more specific, this is tricky because we can implicitly be
598      * called from sampler() in state.c. This means we can't touch anything
599      * other than whatever happens to be the currently active texture, or we
600      * would risk marking already applied sampler states dirty again.
601      *
602      * TODO: Track the current active texture per GL context instead of using
603      * glGet(). */
604
605     ENTER_GL();
606     glGetIntegerv(GL_ACTIVE_TEXTURE, &active_texture);
607     LEAVE_GL();
608     active_sampler = device->rev_tex_unit_map[active_texture - GL_TEXTURE0_ARB];
609
610     if (active_sampler != WINED3D_UNMAPPED_STAGE)
611         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
612     surface_bind(surface, gl_info, srgb);
613 }
614
615 static void surface_force_reload(struct wined3d_surface *surface)
616 {
617     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
618 }
619
620 static void surface_release_client_storage(struct wined3d_surface *surface)
621 {
622     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
623
624     ENTER_GL();
625     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
626     if (surface->texture_name)
627     {
628         surface_bind_and_dirtify(surface, context->gl_info, FALSE);
629         glTexImage2D(surface->texture_target, surface->texture_level,
630                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
631     }
632     if (surface->texture_name_srgb)
633     {
634         surface_bind_and_dirtify(surface, context->gl_info, TRUE);
635         glTexImage2D(surface->texture_target, surface->texture_level,
636                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
637     }
638     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
639     LEAVE_GL();
640
641     context_release(context);
642
643     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
644     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
645     surface_force_reload(surface);
646 }
647
648 static HRESULT surface_private_setup(struct wined3d_surface *surface)
649 {
650     /* TODO: Check against the maximum texture sizes supported by the video card. */
651     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
652     unsigned int pow2Width, pow2Height;
653
654     TRACE("surface %p.\n", surface);
655
656     surface->texture_name = 0;
657     surface->texture_target = GL_TEXTURE_2D;
658
659     /* Non-power2 support */
660     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
661     {
662         pow2Width = surface->resource.width;
663         pow2Height = surface->resource.height;
664     }
665     else
666     {
667         /* Find the nearest pow2 match */
668         pow2Width = pow2Height = 1;
669         while (pow2Width < surface->resource.width)
670             pow2Width <<= 1;
671         while (pow2Height < surface->resource.height)
672             pow2Height <<= 1;
673     }
674     surface->pow2Width = pow2Width;
675     surface->pow2Height = pow2Height;
676
677     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
678     {
679         /* TODO: Add support for non power two compressed textures. */
680         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
681         {
682             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
683                   surface, surface->resource.width, surface->resource.height);
684             return WINED3DERR_NOTAVAILABLE;
685         }
686     }
687
688     if (pow2Width != surface->resource.width
689             || pow2Height != surface->resource.height)
690     {
691         surface->flags |= SFLAG_NONPOW2;
692     }
693
694     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
695             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
696     {
697         /* One of three options:
698          * 1: Do the same as we do with NPOT and scale the texture, (any
699          *    texture ops would require the texture to be scaled which is
700          *    potentially slow)
701          * 2: Set the texture to the maximum size (bad idea).
702          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
703          * 4: Create the surface, but allow it to be used only for DirectDraw
704          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
705          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
706          *    the render target. */
707         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
708         {
709             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
710             return WINED3DERR_NOTAVAILABLE;
711         }
712
713         /* We should never use this surface in combination with OpenGL! */
714         TRACE("Creating an oversized surface: %ux%u.\n",
715                 surface->pow2Width, surface->pow2Height);
716     }
717     else
718     {
719         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
720          * and EXT_PALETTED_TEXTURE is used in combination with texture
721          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
722          * EXT_PALETTED_TEXTURE doesn't work in combination with
723          * ARB_TEXTURE_RECTANGLE. */
724         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
725                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
726                 && gl_info->supported[EXT_PALETTED_TEXTURE]
727                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
728         {
729             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
730             surface->pow2Width = surface->resource.width;
731             surface->pow2Height = surface->resource.height;
732             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
733         }
734     }
735
736     switch (wined3d_settings.offscreen_rendering_mode)
737     {
738         case ORM_FBO:
739             surface->get_drawable_size = get_drawable_size_fbo;
740             break;
741
742         case ORM_BACKBUFFER:
743             surface->get_drawable_size = get_drawable_size_backbuffer;
744             break;
745
746         default:
747             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
748             return WINED3DERR_INVALIDCALL;
749     }
750
751     surface->flags |= SFLAG_INSYSMEM;
752
753     return WINED3D_OK;
754 }
755
756 static void surface_realize_palette(struct wined3d_surface *surface)
757 {
758     struct wined3d_palette *palette = surface->palette;
759
760     TRACE("surface %p.\n", surface);
761
762     if (!palette) return;
763
764     if (surface->resource.format->id == WINED3DFMT_P8_UINT
765             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
766     {
767         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
768         {
769             /* Make sure the texture is up to date. This call doesn't do
770              * anything if the texture is already up to date. */
771             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
772
773             /* We want to force a palette refresh, so mark the drawable as not being up to date */
774             if (!surface_is_offscreen(surface))
775                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
776         }
777         else
778         {
779             if (!(surface->flags & SFLAG_INSYSMEM))
780             {
781                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
782                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
783             }
784             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
785         }
786     }
787
788     if (surface->flags & SFLAG_DIBSECTION)
789     {
790         RGBQUAD col[256];
791         unsigned int i;
792
793         TRACE("Updating the DC's palette.\n");
794
795         for (i = 0; i < 256; ++i)
796         {
797             col[i].rgbRed   = palette->palents[i].peRed;
798             col[i].rgbGreen = palette->palents[i].peGreen;
799             col[i].rgbBlue  = palette->palents[i].peBlue;
800             col[i].rgbReserved = 0;
801         }
802         SetDIBColorTable(surface->hDC, 0, 256, col);
803     }
804
805     /* Propagate the changes to the drawable when we have a palette. */
806     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
807         surface_load_location(surface, SFLAG_INDRAWABLE, NULL);
808 }
809
810 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
811 {
812     HRESULT hr;
813
814     /* If there's no destination surface there is nothing to do. */
815     if (!surface->overlay_dest)
816         return WINED3D_OK;
817
818     /* Blt calls ModifyLocation on the dest surface, which in turn calls
819      * DrawOverlay to update the overlay. Prevent an endless recursion. */
820     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
821         return WINED3D_OK;
822
823     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
824     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
825             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
826     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
827
828     return hr;
829 }
830
831 static void surface_preload(struct wined3d_surface *surface)
832 {
833     TRACE("surface %p.\n", surface);
834
835     surface_internal_preload(surface, SRGB_ANY);
836 }
837
838 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
839 {
840     struct wined3d_device *device = surface->resource.device;
841     const RECT *pass_rect = rect;
842
843     TRACE("surface %p, rect %s, flags %#x.\n",
844             surface, wine_dbgstr_rect(rect), flags);
845
846     if (flags & WINED3DLOCK_DISCARD)
847     {
848         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
849         surface_prepare_system_memory(surface);
850         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
851     }
852     else
853     {
854         /* surface_load_location() does not check if the rectangle specifies
855          * the full surface. Most callers don't need that, so do it here. */
856         if (rect && !rect->top && !rect->left
857                 && rect->right == surface->resource.width
858                 && rect->bottom == surface->resource.height)
859             pass_rect = NULL;
860
861         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
862                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
863                 || surface == device->fb.render_targets[0])))
864             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
865     }
866
867     if (surface->flags & SFLAG_PBO)
868     {
869         const struct wined3d_gl_info *gl_info;
870         struct wined3d_context *context;
871
872         context = context_acquire(device, NULL);
873         gl_info = context->gl_info;
874
875         ENTER_GL();
876         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
877         checkGLcall("glBindBufferARB");
878
879         /* This shouldn't happen but could occur if some other function
880          * didn't handle the PBO properly. */
881         if (surface->resource.allocatedMemory)
882             ERR("The surface already has PBO memory allocated.\n");
883
884         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
885         checkGLcall("glMapBufferARB");
886
887         /* Make sure the PBO isn't set anymore in order not to break non-PBO
888          * calls. */
889         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
890         checkGLcall("glBindBufferARB");
891
892         LEAVE_GL();
893         context_release(context);
894     }
895
896     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
897     {
898         if (!rect)
899             surface_add_dirty_rect(surface, NULL);
900         else
901         {
902             WINED3DBOX b;
903
904             b.Left = rect->left;
905             b.Top = rect->top;
906             b.Right = rect->right;
907             b.Bottom = rect->bottom;
908             b.Front = 0;
909             b.Back = 1;
910             surface_add_dirty_rect(surface, &b);
911         }
912     }
913 }
914
915 static void surface_unmap(struct wined3d_surface *surface)
916 {
917     struct wined3d_device *device = surface->resource.device;
918     BOOL fullsurface;
919
920     TRACE("surface %p.\n", surface);
921
922     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
923
924     if (surface->flags & SFLAG_PBO)
925     {
926         const struct wined3d_gl_info *gl_info;
927         struct wined3d_context *context;
928
929         TRACE("Freeing PBO memory.\n");
930
931         context = context_acquire(device, NULL);
932         gl_info = context->gl_info;
933
934         ENTER_GL();
935         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
936         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
937         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
938         checkGLcall("glUnmapBufferARB");
939         LEAVE_GL();
940         context_release(context);
941
942         surface->resource.allocatedMemory = NULL;
943     }
944
945     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
946
947     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
948     {
949         TRACE("Not dirtified, nothing to do.\n");
950         goto done;
951     }
952
953     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
954             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
955     {
956         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
957         {
958             static BOOL warned = FALSE;
959             if (!warned)
960             {
961                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
962                 warned = TRUE;
963             }
964             goto done;
965         }
966
967         if (!surface->dirtyRect.left && !surface->dirtyRect.top
968                 && surface->dirtyRect.right == surface->resource.width
969                 && surface->dirtyRect.bottom == surface->resource.height)
970         {
971             fullsurface = TRUE;
972         }
973         else
974         {
975             /* TODO: Proper partial rectangle tracking. */
976             fullsurface = FALSE;
977             surface->flags |= SFLAG_INSYSMEM;
978         }
979
980         surface_load_location(surface, SFLAG_INDRAWABLE, fullsurface ? NULL : &surface->dirtyRect);
981
982         /* Partial rectangle tracking is not commonly implemented, it is only
983          * done for render targets. INSYSMEM was set before to tell
984          * surface_load_location() where to read the rectangle from.
985          * Indrawable is set because all modifications from the partial
986          * sysmem copy are written back to the drawable, thus the surface is
987          * merged again in the drawable. The sysmem copy is not fully up to
988          * date because only a subrectangle was read in Map(). */
989         if (!fullsurface)
990         {
991             surface_modify_location(surface, SFLAG_INDRAWABLE, TRUE);
992             surface_evict_sysmem(surface);
993         }
994
995         surface->dirtyRect.left = surface->resource.width;
996         surface->dirtyRect.top = surface->resource.height;
997         surface->dirtyRect.right = 0;
998         surface->dirtyRect.bottom = 0;
999     }
1000     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1001     {
1002         FIXME("Depth / stencil buffer locking is not implemented.\n");
1003     }
1004
1005 done:
1006     /* Overlays have to be redrawn manually after changes with the GL implementation */
1007     if (surface->overlay_dest)
1008         surface->surface_ops->surface_draw_overlay(surface);
1009 }
1010
1011 static HRESULT surface_getdc(struct wined3d_surface *surface)
1012 {
1013     WINED3DLOCKED_RECT lock;
1014     HRESULT hr;
1015
1016     TRACE("surface %p.\n", surface);
1017
1018     /* Create a DIB section if there isn't a dc yet. */
1019     if (!surface->hDC)
1020     {
1021         if (surface->flags & SFLAG_CLIENT)
1022         {
1023             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1024             surface_release_client_storage(surface);
1025         }
1026         hr = surface_create_dib_section(surface);
1027         if (FAILED(hr))
1028             return WINED3DERR_INVALIDCALL;
1029
1030         /* Use the DIB section from now on if we are not using a PBO. */
1031         if (!(surface->flags & SFLAG_PBO))
1032             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1033     }
1034
1035     /* Map the surface. */
1036     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1037     if (FAILED(hr))
1038         ERR("Map failed, hr %#x.\n", hr);
1039
1040     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1041      * activates the allocatedMemory. */
1042     if (surface->flags & SFLAG_PBO)
1043         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->dib.bitmap_size);
1044
1045     return hr;
1046 }
1047
1048 static HRESULT surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1049 {
1050     TRACE("surface %p, override %p.\n", surface, override);
1051
1052     /* Flipping is only supported on render targets and overlays. */
1053     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
1054     {
1055         WARN("Tried to flip a non-render target, non-overlay surface.\n");
1056         return WINEDDERR_NOTFLIPPABLE;
1057     }
1058
1059     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1060     {
1061         flip_surface(surface, override);
1062
1063         /* Update the overlay if it is visible */
1064         if (surface->overlay_dest)
1065             return surface->surface_ops->surface_draw_overlay(surface);
1066         else
1067             return WINED3D_OK;
1068     }
1069
1070     return WINED3D_OK;
1071 }
1072
1073 static BOOL surface_is_full_rect(struct wined3d_surface *surface, const RECT *r)
1074 {
1075     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1076         return FALSE;
1077     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1078         return FALSE;
1079     return TRUE;
1080 }
1081
1082 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1083         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1084 {
1085     const struct wined3d_gl_info *gl_info;
1086     struct wined3d_context *context;
1087     DWORD src_mask, dst_mask;
1088     GLbitfield gl_mask;
1089
1090     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1091             device, src_surface, wine_dbgstr_rect(src_rect),
1092             dst_surface, wine_dbgstr_rect(dst_rect));
1093
1094     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1095     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1096
1097     if (src_mask != dst_mask)
1098     {
1099         ERR("Incompatible formats %s and %s.\n",
1100                 debug_d3dformat(src_surface->resource.format->id),
1101                 debug_d3dformat(dst_surface->resource.format->id));
1102         return;
1103     }
1104
1105     if (!src_mask)
1106     {
1107         ERR("Not a depth / stencil format: %s.\n",
1108                 debug_d3dformat(src_surface->resource.format->id));
1109         return;
1110     }
1111
1112     gl_mask = 0;
1113     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1114         gl_mask |= GL_DEPTH_BUFFER_BIT;
1115     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1116         gl_mask |= GL_STENCIL_BUFFER_BIT;
1117
1118     /* Make sure the locations are up-to-date. Loading the destination
1119      * surface isn't required if the entire surface is overwritten. */
1120     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1121     if (!surface_is_full_rect(dst_surface, dst_rect))
1122         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1123
1124     context = context_acquire(device, NULL);
1125     if (!context->valid)
1126     {
1127         context_release(context);
1128         WARN("Invalid context, skipping blit.\n");
1129         return;
1130     }
1131
1132     gl_info = context->gl_info;
1133
1134     ENTER_GL();
1135
1136     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1137     glReadBuffer(GL_NONE);
1138     checkGLcall("glReadBuffer()");
1139     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1140
1141     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1142     context_set_draw_buffer(context, GL_NONE);
1143     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1144
1145     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1146     {
1147         glDepthMask(GL_TRUE);
1148         device_invalidate_state(device, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1149     }
1150     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1151     {
1152         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1153         {
1154             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1155             device_invalidate_state(device, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1156         }
1157         glStencilMask(~0U);
1158         device_invalidate_state(device, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1159     }
1160
1161     glDisable(GL_SCISSOR_TEST);
1162     device_invalidate_state(device, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1163
1164     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1165             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1166     checkGLcall("glBlitFramebuffer()");
1167
1168     LEAVE_GL();
1169
1170     if (wined3d_settings.strict_draw_ordering)
1171         wglFlush(); /* Flush to ensure ordering across contexts. */
1172
1173     context_release(context);
1174 }
1175
1176 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1177         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1178         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1179 {
1180     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1181         return FALSE;
1182
1183     /* Source and/or destination need to be on the GL side */
1184     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1185         return FALSE;
1186
1187     switch (blit_op)
1188     {
1189         case WINED3D_BLIT_OP_COLOR_BLIT:
1190             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1191                 return FALSE;
1192             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1193                 return FALSE;
1194             break;
1195
1196         case WINED3D_BLIT_OP_DEPTH_BLIT:
1197             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1198                 return FALSE;
1199             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1200                 return FALSE;
1201             break;
1202
1203         default:
1204             return FALSE;
1205     }
1206
1207     if (!(src_format->id == dst_format->id
1208             || (is_identity_fixup(src_format->color_fixup)
1209             && is_identity_fixup(dst_format->color_fixup))))
1210         return FALSE;
1211
1212     return TRUE;
1213 }
1214
1215 static BOOL surface_convert_depth_to_float(struct wined3d_surface *surface, DWORD depth, float *float_depth)
1216 {
1217     const struct wined3d_format *format = surface->resource.format;
1218
1219     switch (format->id)
1220     {
1221         case WINED3DFMT_S1_UINT_D15_UNORM:
1222             *float_depth = depth / (float)0x00007fff;
1223             break;
1224
1225         case WINED3DFMT_D16_UNORM:
1226             *float_depth = depth / (float)0x0000ffff;
1227             break;
1228
1229         case WINED3DFMT_D24_UNORM_S8_UINT:
1230         case WINED3DFMT_X8D24_UNORM:
1231             *float_depth = depth / (float)0x00ffffff;
1232             break;
1233
1234         case WINED3DFMT_D32_UNORM:
1235             *float_depth = depth / (float)0xffffffff;
1236             break;
1237
1238         default:
1239             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1240             return FALSE;
1241     }
1242
1243     return TRUE;
1244 }
1245
1246 /* Do not call while under the GL lock. */
1247 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1248 {
1249     const struct wined3d_resource *resource = &surface->resource;
1250     struct wined3d_device *device = resource->device;
1251     const struct blit_shader *blitter;
1252
1253     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1254             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1255     if (!blitter)
1256     {
1257         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1258         return WINED3DERR_INVALIDCALL;
1259     }
1260
1261     return blitter->depth_fill(device, surface, rect, depth);
1262 }
1263
1264 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1265         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1266 {
1267     struct wined3d_device *device = src_surface->resource.device;
1268
1269     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1270             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1271             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1272         return WINED3DERR_INVALIDCALL;
1273
1274     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1275
1276     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1277             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1278     surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
1279
1280     return WINED3D_OK;
1281 }
1282
1283 /* Do not call while under the GL lock. */
1284 static HRESULT surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1285         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1286         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1287 {
1288     struct wined3d_device *device = dst_surface->resource.device;
1289     DWORD src_ds_flags, dst_ds_flags;
1290
1291     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1292             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1293             flags, fx, debug_d3dtexturefiltertype(filter));
1294     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1295
1296     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1297     {
1298         WARN(" Surface is busy, returning DDERR_SURFACEBUSY\n");
1299         return WINEDDERR_SURFACEBUSY;
1300     }
1301
1302     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1303     if (src_surface)
1304         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1305     else
1306         src_ds_flags = 0;
1307
1308     if (src_ds_flags || dst_ds_flags)
1309     {
1310         if (flags & WINEDDBLT_DEPTHFILL)
1311         {
1312             float depth;
1313             RECT rect;
1314
1315             TRACE("Depth fill.\n");
1316
1317             surface_get_rect(dst_surface, dst_rect_in, &rect);
1318
1319             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1320                 return WINED3DERR_INVALIDCALL;
1321
1322             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &rect, depth)))
1323                 return WINED3D_OK;
1324         }
1325         else
1326         {
1327             RECT src_rect, dst_rect;
1328
1329             /* Accessing depth / stencil surfaces is supposed to fail while in
1330              * a scene, except for fills, which seem to work. */
1331             if (device->inScene)
1332             {
1333                 WARN("Rejecting depth / stencil access while in scene.\n");
1334                 return WINED3DERR_INVALIDCALL;
1335             }
1336
1337             if (src_ds_flags != dst_ds_flags)
1338             {
1339                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1340                 return WINED3DERR_INVALIDCALL;
1341             }
1342
1343             if (src_rect_in && (src_rect_in->top || src_rect_in->left
1344                     || src_rect_in->bottom != src_surface->resource.height
1345                     || src_rect_in->right != src_surface->resource.width))
1346             {
1347                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1348                         wine_dbgstr_rect(src_rect_in));
1349                 return WINED3DERR_INVALIDCALL;
1350             }
1351
1352             if (dst_rect_in && (dst_rect_in->top || dst_rect_in->left
1353                     || dst_rect_in->bottom != dst_surface->resource.height
1354                     || dst_rect_in->right != dst_surface->resource.width))
1355             {
1356                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1357                         wine_dbgstr_rect(src_rect_in));
1358                 return WINED3DERR_INVALIDCALL;
1359             }
1360
1361             if (src_surface->resource.height != dst_surface->resource.height
1362                     || src_surface->resource.width != dst_surface->resource.width)
1363             {
1364                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1365                 return WINED3DERR_INVALIDCALL;
1366             }
1367
1368             surface_get_rect(src_surface, src_rect_in, &src_rect);
1369             surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1370
1371             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1372                 return WINED3D_OK;
1373         }
1374     }
1375
1376     /* Special cases for render targets. */
1377     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1378             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1379     {
1380         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, dst_rect_in,
1381                 src_surface, src_rect_in, flags, fx, filter)))
1382             return WINED3D_OK;
1383     }
1384
1385     /* For the rest call the X11 surface implementation. For render targets
1386      * this should be implemented OpenGL accelerated in BltOverride, other
1387      * blits are rather rare. */
1388     return surface_cpu_blt(dst_surface, dst_rect_in, src_surface, src_rect_in, flags, fx, filter);
1389 }
1390
1391 /* Do not call while under the GL lock. */
1392 static HRESULT surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1393         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1394 {
1395     struct wined3d_device *device = dst_surface->resource.device;
1396
1397     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
1398             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1399
1400     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface->flags & SFLAG_LOCKED))
1401     {
1402         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1403         return WINEDDERR_SURFACEBUSY;
1404     }
1405
1406     if (device->inScene && (dst_surface == device->fb.depth_stencil || src_surface == device->fb.depth_stencil))
1407     {
1408         WARN("Attempt to access the depth / stencil surface while in a scene.\n");
1409         return WINED3DERR_INVALIDCALL;
1410     }
1411
1412     /* Special cases for RenderTargets */
1413     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1414             || (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
1415     {
1416
1417         RECT src_rect, dst_rect;
1418         DWORD flags = 0;
1419
1420         surface_get_rect(src_surface, src_rect_in, &src_rect);
1421
1422         dst_rect.left = dst_x;
1423         dst_rect.top = dst_y;
1424         dst_rect.right = dst_x + src_rect.right - src_rect.left;
1425         dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1426
1427         /* Convert BltFast flags into Blt ones because BltOverride is called
1428          * from Blt as well. */
1429         if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1430             flags |= WINEDDBLT_KEYSRC;
1431         if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1432             flags |= WINEDDBLT_KEYDEST;
1433         if (trans & WINEDDBLTFAST_WAIT)
1434             flags |= WINEDDBLT_WAIT;
1435         if (trans & WINEDDBLTFAST_DONOTWAIT)
1436             flags |= WINEDDBLT_DONOTWAIT;
1437
1438         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface,
1439                 &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT)))
1440             return WINED3D_OK;
1441     }
1442
1443     return surface_cpu_bltfast(dst_surface, dst_x, dst_y, src_surface, src_rect_in, trans);
1444 }
1445
1446 static HRESULT surface_set_mem(struct wined3d_surface *surface, void *mem)
1447 {
1448     TRACE("surface %p, mem %p.\n", surface, mem);
1449
1450     if (mem && mem != surface->resource.allocatedMemory)
1451     {
1452         void *release = NULL;
1453
1454         /* Do I have to copy the old surface content? */
1455         if (surface->flags & SFLAG_DIBSECTION)
1456         {
1457             SelectObject(surface->hDC, surface->dib.holdbitmap);
1458             DeleteDC(surface->hDC);
1459             /* Release the DIB section. */
1460             DeleteObject(surface->dib.DIBsection);
1461             surface->dib.bitmap_data = NULL;
1462             surface->resource.allocatedMemory = NULL;
1463             surface->hDC = NULL;
1464             surface->flags &= ~SFLAG_DIBSECTION;
1465         }
1466         else if (!(surface->flags & SFLAG_USERPTR))
1467         {
1468             release = surface->resource.heapMemory;
1469             surface->resource.heapMemory = NULL;
1470         }
1471         surface->resource.allocatedMemory = mem;
1472         surface->flags |= SFLAG_USERPTR;
1473
1474         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
1475         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1476
1477         /* For client textures OpenGL has to be notified. */
1478         if (surface->flags & SFLAG_CLIENT)
1479             surface_release_client_storage(surface);
1480
1481         /* Now free the old memory if any. */
1482         HeapFree(GetProcessHeap(), 0, release);
1483     }
1484     else if (surface->flags & SFLAG_USERPTR)
1485     {
1486         /* Map and GetDC will re-create the dib section and allocated memory. */
1487         surface->resource.allocatedMemory = NULL;
1488         /* HeapMemory should be NULL already. */
1489         if (surface->resource.heapMemory)
1490             ERR("User pointer surface has heap memory allocated.\n");
1491         surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
1492
1493         if (surface->flags & SFLAG_CLIENT)
1494             surface_release_client_storage(surface);
1495
1496         surface_prepare_system_memory(surface);
1497         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1498     }
1499
1500     return WINED3D_OK;
1501 }
1502
1503 /* Context activation is done by the caller. */
1504 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1505 {
1506     if (!surface->resource.heapMemory)
1507     {
1508         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1509         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1510                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1511     }
1512
1513     ENTER_GL();
1514     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1515     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1516     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1517             surface->resource.size, surface->resource.allocatedMemory));
1518     checkGLcall("glGetBufferSubDataARB");
1519     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1520     checkGLcall("glDeleteBuffersARB");
1521     LEAVE_GL();
1522
1523     surface->pbo = 0;
1524     surface->flags &= ~SFLAG_PBO;
1525 }
1526
1527 /* Do not call while under the GL lock. */
1528 static void surface_unload(struct wined3d_resource *resource)
1529 {
1530     struct wined3d_surface *surface = surface_from_resource(resource);
1531     struct wined3d_renderbuffer_entry *entry, *entry2;
1532     struct wined3d_device *device = resource->device;
1533     const struct wined3d_gl_info *gl_info;
1534     struct wined3d_context *context;
1535
1536     TRACE("surface %p.\n", surface);
1537
1538     if (resource->pool == WINED3DPOOL_DEFAULT)
1539     {
1540         /* Default pool resources are supposed to be destroyed before Reset is called.
1541          * Implicit resources stay however. So this means we have an implicit render target
1542          * or depth stencil. The content may be destroyed, but we still have to tear down
1543          * opengl resources, so we cannot leave early.
1544          *
1545          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1546          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1547          * or the depth stencil into an FBO the texture or render buffer will be removed
1548          * and all flags get lost
1549          */
1550         surface_init_sysmem(surface);
1551     }
1552     else
1553     {
1554         /* Load the surface into system memory */
1555         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1556         surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
1557     }
1558     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1559     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1560     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1561
1562     context = context_acquire(device, NULL);
1563     gl_info = context->gl_info;
1564
1565     /* Destroy PBOs, but load them into real sysmem before */
1566     if (surface->flags & SFLAG_PBO)
1567         surface_remove_pbo(surface, gl_info);
1568
1569     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1570      * all application-created targets the application has to release the surface
1571      * before calling _Reset
1572      */
1573     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1574     {
1575         ENTER_GL();
1576         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1577         LEAVE_GL();
1578         list_remove(&entry->entry);
1579         HeapFree(GetProcessHeap(), 0, entry);
1580     }
1581     list_init(&surface->renderbuffers);
1582     surface->current_renderbuffer = NULL;
1583
1584     /* If we're in a texture, the texture name belongs to the texture.
1585      * Otherwise, destroy it. */
1586     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1587     {
1588         ENTER_GL();
1589         glDeleteTextures(1, &surface->texture_name);
1590         surface->texture_name = 0;
1591         glDeleteTextures(1, &surface->texture_name_srgb);
1592         surface->texture_name_srgb = 0;
1593         LEAVE_GL();
1594     }
1595
1596     context_release(context);
1597
1598     resource_unload(resource);
1599 }
1600
1601 static const struct wined3d_resource_ops surface_resource_ops =
1602 {
1603     surface_unload,
1604 };
1605
1606 static const struct wined3d_surface_ops surface_ops =
1607 {
1608     surface_private_setup,
1609     surface_cleanup,
1610     surface_realize_palette,
1611     surface_draw_overlay,
1612     surface_preload,
1613     surface_map,
1614     surface_unmap,
1615     surface_getdc,
1616     surface_flip,
1617     surface_blt,
1618     surface_bltfast,
1619     surface_set_mem,
1620 };
1621
1622 /*****************************************************************************
1623  * Initializes the GDI surface, aka creates the DIB section we render to
1624  * The DIB section creation is done by calling GetDC, which will create the
1625  * section and releasing the dc to allow the app to use it. The dib section
1626  * will stay until the surface is released
1627  *
1628  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1629  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1630  * avoid confusion in the shared surface code.
1631  *
1632  * Returns:
1633  *  WINED3D_OK on success
1634  *  The return values of called methods on failure
1635  *
1636  *****************************************************************************/
1637 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1638 {
1639     HRESULT hr;
1640
1641     TRACE("surface %p.\n", surface);
1642
1643     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1644     {
1645         ERR("Overlays not yet supported by GDI surfaces.\n");
1646         return WINED3DERR_INVALIDCALL;
1647     }
1648
1649     /* Sysmem textures have memory already allocated - release it,
1650      * this avoids an unnecessary memcpy. */
1651     hr = surface_create_dib_section(surface);
1652     if (SUCCEEDED(hr))
1653     {
1654         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1655         surface->resource.heapMemory = NULL;
1656         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1657     }
1658
1659     /* We don't mind the nonpow2 stuff in GDI. */
1660     surface->pow2Width = surface->resource.width;
1661     surface->pow2Height = surface->resource.height;
1662
1663     return WINED3D_OK;
1664 }
1665
1666 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1667 {
1668     TRACE("surface %p.\n", surface);
1669
1670     if (surface->flags & SFLAG_DIBSECTION)
1671     {
1672         /* Release the DC. */
1673         SelectObject(surface->hDC, surface->dib.holdbitmap);
1674         DeleteDC(surface->hDC);
1675         /* Release the DIB section. */
1676         DeleteObject(surface->dib.DIBsection);
1677         surface->dib.bitmap_data = NULL;
1678         surface->resource.allocatedMemory = NULL;
1679     }
1680
1681     if (surface->flags & SFLAG_USERPTR)
1682         wined3d_surface_set_mem(surface, NULL);
1683     if (surface->overlay_dest)
1684         list_remove(&surface->overlay_entry);
1685
1686     HeapFree(GetProcessHeap(), 0, surface->palette9);
1687
1688     resource_cleanup(&surface->resource);
1689 }
1690
1691 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1692 {
1693     struct wined3d_palette *palette = surface->palette;
1694
1695     TRACE("surface %p.\n", surface);
1696
1697     if (!palette) return;
1698
1699     if (surface->flags & SFLAG_DIBSECTION)
1700     {
1701         RGBQUAD col[256];
1702         unsigned int i;
1703
1704         TRACE("Updating the DC's palette.\n");
1705
1706         for (i = 0; i < 256; ++i)
1707         {
1708             col[i].rgbRed = palette->palents[i].peRed;
1709             col[i].rgbGreen = palette->palents[i].peGreen;
1710             col[i].rgbBlue = palette->palents[i].peBlue;
1711             col[i].rgbReserved = 0;
1712         }
1713         SetDIBColorTable(surface->hDC, 0, 256, col);
1714     }
1715
1716     /* Update the image because of the palette change. Some games like e.g.
1717      * Red Alert call SetEntries a lot to implement fading. */
1718     /* Tell the swapchain to update the screen. */
1719     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1720     {
1721         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1722         if (surface == swapchain->front_buffer)
1723         {
1724             x11_copy_to_screen(swapchain, NULL);
1725         }
1726     }
1727 }
1728
1729 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
1730 {
1731     FIXME("GDI surfaces can't draw overlays yet.\n");
1732     return E_FAIL;
1733 }
1734
1735 static void gdi_surface_preload(struct wined3d_surface *surface)
1736 {
1737     TRACE("surface %p.\n", surface);
1738
1739     ERR("Preloading GDI surfaces is not supported.\n");
1740 }
1741
1742 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1743 {
1744     TRACE("surface %p, rect %s, flags %#x.\n",
1745             surface, wine_dbgstr_rect(rect), flags);
1746
1747     if (!surface->resource.allocatedMemory)
1748     {
1749         /* This happens on gdi surfaces if the application set a user pointer
1750          * and resets it. Recreate the DIB section. */
1751         surface_create_dib_section(surface);
1752         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1753     }
1754 }
1755
1756 static void gdi_surface_unmap(struct wined3d_surface *surface)
1757 {
1758     TRACE("surface %p.\n", surface);
1759
1760     /* Tell the swapchain to update the screen. */
1761     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1762     {
1763         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1764         if (surface == swapchain->front_buffer)
1765         {
1766             x11_copy_to_screen(swapchain, &surface->lockedRect);
1767         }
1768     }
1769
1770     memset(&surface->lockedRect, 0, sizeof(RECT));
1771 }
1772
1773 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
1774 {
1775     WINED3DLOCKED_RECT lock;
1776     HRESULT hr;
1777
1778     TRACE("surface %p.\n", surface);
1779
1780     /* Should have a DIB section already. */
1781     if (!(surface->flags & SFLAG_DIBSECTION))
1782     {
1783         WARN("DC not supported on this surface\n");
1784         return WINED3DERR_INVALIDCALL;
1785     }
1786
1787     /* Map the surface. */
1788     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1789     if (FAILED(hr))
1790         ERR("Map failed, hr %#x.\n", hr);
1791
1792     return hr;
1793 }
1794
1795 static HRESULT gdi_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1796 {
1797     TRACE("surface %p, override %p.\n", surface, override);
1798
1799     return WINED3D_OK;
1800 }
1801
1802 static HRESULT gdi_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
1803         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
1804         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1805 {
1806     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1807             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
1808             flags, fx, debug_d3dtexturefiltertype(filter));
1809
1810     return surface_cpu_blt(dst_surface, dst_rect, src_surface, src_rect, flags, fx, filter);
1811 }
1812
1813 static HRESULT gdi_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1814         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
1815 {
1816     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
1817             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
1818
1819     return surface_cpu_bltfast(dst_surface, dst_x, dst_y, src_surface, src_rect, trans);
1820 }
1821
1822 static HRESULT gdi_surface_set_mem(struct wined3d_surface *surface, void *mem)
1823 {
1824     TRACE("surface %p, mem %p.\n", surface, mem);
1825
1826     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
1827     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1828     {
1829         ERR("Not supported on render targets.\n");
1830         return WINED3DERR_INVALIDCALL;
1831     }
1832
1833     if (mem && mem != surface->resource.allocatedMemory)
1834     {
1835         void *release = NULL;
1836
1837         /* Do I have to copy the old surface content? */
1838         if (surface->flags & SFLAG_DIBSECTION)
1839         {
1840             SelectObject(surface->hDC, surface->dib.holdbitmap);
1841             DeleteDC(surface->hDC);
1842             /* Release the DIB section. */
1843             DeleteObject(surface->dib.DIBsection);
1844             surface->dib.bitmap_data = NULL;
1845             surface->resource.allocatedMemory = NULL;
1846             surface->hDC = NULL;
1847             surface->flags &= ~SFLAG_DIBSECTION;
1848         }
1849         else if (!(surface->flags & SFLAG_USERPTR))
1850         {
1851             release = surface->resource.allocatedMemory;
1852         }
1853         surface->resource.allocatedMemory = mem;
1854         surface->flags |= SFLAG_USERPTR | SFLAG_INSYSMEM;
1855
1856         /* Now free the old memory, if any. */
1857         HeapFree(GetProcessHeap(), 0, release);
1858     }
1859     else if (surface->flags & SFLAG_USERPTR)
1860     {
1861         /* Map() and GetDC() will re-create the dib section and allocated memory. */
1862         surface->resource.allocatedMemory = NULL;
1863         surface->flags &= ~SFLAG_USERPTR;
1864     }
1865
1866     return WINED3D_OK;
1867 }
1868
1869 static const struct wined3d_surface_ops gdi_surface_ops =
1870 {
1871     gdi_surface_private_setup,
1872     surface_gdi_cleanup,
1873     gdi_surface_realize_palette,
1874     gdi_surface_draw_overlay,
1875     gdi_surface_preload,
1876     gdi_surface_map,
1877     gdi_surface_unmap,
1878     gdi_surface_getdc,
1879     gdi_surface_flip,
1880     gdi_surface_blt,
1881     gdi_surface_bltfast,
1882     gdi_surface_set_mem,
1883 };
1884
1885 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
1886 {
1887     GLuint *name;
1888     DWORD flag;
1889
1890     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
1891
1892     if(srgb)
1893     {
1894         name = &surface->texture_name_srgb;
1895         flag = SFLAG_INSRGBTEX;
1896     }
1897     else
1898     {
1899         name = &surface->texture_name;
1900         flag = SFLAG_INTEXTURE;
1901     }
1902
1903     if (!*name && new_name)
1904     {
1905         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
1906          * surface has no texture name yet. See if we can get rid of this. */
1907         if (surface->flags & flag)
1908             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
1909         surface_modify_location(surface, flag, FALSE);
1910     }
1911
1912     *name = new_name;
1913     surface_force_reload(surface);
1914 }
1915
1916 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
1917 {
1918     TRACE("surface %p, target %#x.\n", surface, target);
1919
1920     if (surface->texture_target != target)
1921     {
1922         if (target == GL_TEXTURE_RECTANGLE_ARB)
1923         {
1924             surface->flags &= ~SFLAG_NORMCOORD;
1925         }
1926         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
1927         {
1928             surface->flags |= SFLAG_NORMCOORD;
1929         }
1930     }
1931     surface->texture_target = target;
1932     surface_force_reload(surface);
1933 }
1934
1935 /* Context activation is done by the caller. */
1936 void surface_bind(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
1937 {
1938     TRACE("surface %p, gl_info %p, srgb %#x.\n", surface, gl_info, srgb);
1939
1940     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
1941     {
1942         struct wined3d_texture *texture = surface->container.u.texture;
1943
1944         TRACE("Passing to container (%p).\n", texture);
1945         texture->texture_ops->texture_bind(texture, gl_info, srgb);
1946     }
1947     else
1948     {
1949         if (surface->texture_level)
1950         {
1951             ERR("Standalone surface %p is non-zero texture level %u.\n",
1952                     surface, surface->texture_level);
1953         }
1954
1955         if (srgb)
1956             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
1957
1958         ENTER_GL();
1959
1960         if (!surface->texture_name)
1961         {
1962             glGenTextures(1, &surface->texture_name);
1963             checkGLcall("glGenTextures");
1964
1965             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
1966
1967             glBindTexture(surface->texture_target, surface->texture_name);
1968             checkGLcall("glBindTexture");
1969             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
1970             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
1971             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
1972             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1973             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1974             checkGLcall("glTexParameteri");
1975         }
1976         else
1977         {
1978             glBindTexture(surface->texture_target, surface->texture_name);
1979             checkGLcall("glBindTexture");
1980         }
1981
1982         LEAVE_GL();
1983     }
1984 }
1985
1986 /* This function checks if the primary render target uses the 8bit paletted format. */
1987 static BOOL primary_render_target_is_p8(struct wined3d_device *device)
1988 {
1989     if (device->fb.render_targets && device->fb.render_targets[0])
1990     {
1991         struct wined3d_surface *render_target = device->fb.render_targets[0];
1992         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1993                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1994             return TRUE;
1995     }
1996     return FALSE;
1997 }
1998
1999 /* This call just downloads data, the caller is responsible for binding the
2000  * correct texture. */
2001 /* Context activation is done by the caller. */
2002 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2003 {
2004     const struct wined3d_format *format = surface->resource.format;
2005
2006     /* Only support read back of converted P8 surfaces. */
2007     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2008     {
2009         FIXME("Readback conversion not supported for format %s.\n", debug_d3dformat(format->id));
2010         return;
2011     }
2012
2013     ENTER_GL();
2014
2015     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2016     {
2017         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2018                 surface, surface->texture_level, format->glFormat, format->glType,
2019                 surface->resource.allocatedMemory);
2020
2021         if (surface->flags & SFLAG_PBO)
2022         {
2023             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2024             checkGLcall("glBindBufferARB");
2025             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2026             checkGLcall("glGetCompressedTexImageARB");
2027             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2028             checkGLcall("glBindBufferARB");
2029         }
2030         else
2031         {
2032             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2033                     surface->texture_level, surface->resource.allocatedMemory));
2034             checkGLcall("glGetCompressedTexImageARB");
2035         }
2036
2037         LEAVE_GL();
2038     }
2039     else
2040     {
2041         void *mem;
2042         GLenum gl_format = format->glFormat;
2043         GLenum gl_type = format->glType;
2044         int src_pitch = 0;
2045         int dst_pitch = 0;
2046
2047         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2048         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2049         {
2050             gl_format = GL_ALPHA;
2051             gl_type = GL_UNSIGNED_BYTE;
2052         }
2053
2054         if (surface->flags & SFLAG_NONPOW2)
2055         {
2056             unsigned char alignment = surface->resource.device->surface_alignment;
2057             src_pitch = format->byte_count * surface->pow2Width;
2058             dst_pitch = wined3d_surface_get_pitch(surface);
2059             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2060             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2061         }
2062         else
2063         {
2064             mem = surface->resource.allocatedMemory;
2065         }
2066
2067         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2068                 surface, surface->texture_level, gl_format, gl_type, mem);
2069
2070         if (surface->flags & SFLAG_PBO)
2071         {
2072             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2073             checkGLcall("glBindBufferARB");
2074
2075             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2076             checkGLcall("glGetTexImage");
2077
2078             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2079             checkGLcall("glBindBufferARB");
2080         }
2081         else
2082         {
2083             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2084             checkGLcall("glGetTexImage");
2085         }
2086         LEAVE_GL();
2087
2088         if (surface->flags & SFLAG_NONPOW2)
2089         {
2090             const BYTE *src_data;
2091             BYTE *dst_data;
2092             UINT y;
2093             /*
2094              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2095              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2096              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2097              *
2098              * We're doing this...
2099              *
2100              * instead of boxing the texture :
2101              * |<-texture width ->|  -->pow2width|   /\
2102              * |111111111111111111|              |   |
2103              * |222 Texture 222222| boxed empty  | texture height
2104              * |3333 Data 33333333|              |   |
2105              * |444444444444444444|              |   \/
2106              * -----------------------------------   |
2107              * |     boxed  empty | boxed empty  | pow2height
2108              * |                  |              |   \/
2109              * -----------------------------------
2110              *
2111              *
2112              * we're repacking the data to the expected texture width
2113              *
2114              * |<-texture width ->|  -->pow2width|   /\
2115              * |111111111111111111222222222222222|   |
2116              * |222333333333333333333444444444444| texture height
2117              * |444444                           |   |
2118              * |                                 |   \/
2119              * |                                 |   |
2120              * |            empty                | pow2height
2121              * |                                 |   \/
2122              * -----------------------------------
2123              *
2124              * == is the same as
2125              *
2126              * |<-texture width ->|    /\
2127              * |111111111111111111|
2128              * |222222222222222222|texture height
2129              * |333333333333333333|
2130              * |444444444444444444|    \/
2131              * --------------------
2132              *
2133              * this also means that any references to allocatedMemory should work with the data as if were a
2134              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2135              *
2136              * internally the texture is still stored in a boxed format so any references to textureName will
2137              * get a boxed texture with width pow2width and not a texture of width resource.width.
2138              *
2139              * Performance should not be an issue, because applications normally do not lock the surfaces when
2140              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2141              * and doesn't have to be re-read. */
2142             src_data = mem;
2143             dst_data = surface->resource.allocatedMemory;
2144             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2145             for (y = 1; y < surface->resource.height; ++y)
2146             {
2147                 /* skip the first row */
2148                 src_data += src_pitch;
2149                 dst_data += dst_pitch;
2150                 memcpy(dst_data, src_data, dst_pitch);
2151             }
2152
2153             HeapFree(GetProcessHeap(), 0, mem);
2154         }
2155     }
2156
2157     /* Surface has now been downloaded */
2158     surface->flags |= SFLAG_INSYSMEM;
2159 }
2160
2161 /* This call just uploads data, the caller is responsible for binding the
2162  * correct texture. */
2163 /* Context activation is done by the caller. */
2164 void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2165         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2166         BOOL srgb, const struct wined3d_bo_address *data)
2167 {
2168     UINT update_w = src_rect->right - src_rect->left;
2169     UINT update_h = src_rect->bottom - src_rect->top;
2170
2171     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2172             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2173             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2174
2175     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2176         update_h *= format->heightscale;
2177
2178     ENTER_GL();
2179
2180     if (data->buffer_object)
2181     {
2182         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2183         checkGLcall("glBindBufferARB");
2184     }
2185
2186     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2187     {
2188         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2189         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2190         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2191         const BYTE *addr = data->addr;
2192         GLenum internal;
2193
2194         addr += (src_rect->top / format->block_height) * src_pitch;
2195         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2196
2197         if (srgb)
2198             internal = format->glGammaInternal;
2199         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2200             internal = format->rtInternal;
2201         else
2202             internal = format->glInternal;
2203
2204         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2205                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2206                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2207
2208         if (row_length == src_pitch)
2209         {
2210             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2211                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2212         }
2213         else
2214         {
2215             UINT row, y;
2216
2217             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2218              * can't use the unpack row length like below. */
2219             for (row = 0, y = dst_point->y; row < row_count; ++row)
2220             {
2221                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2222                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2223                 y += format->block_height;
2224                 addr += src_pitch;
2225             }
2226         }
2227         checkGLcall("glCompressedTexSubImage2DARB");
2228     }
2229     else
2230     {
2231         const BYTE *addr = data->addr;
2232
2233         addr += src_rect->top * src_w * format->byte_count;
2234         addr += src_rect->left * format->byte_count;
2235
2236         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2237                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2238                 update_w, update_h, format->glFormat, format->glType, addr);
2239
2240         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2241         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2242                 update_w, update_h, format->glFormat, format->glType, addr);
2243         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2244         checkGLcall("glTexSubImage2D");
2245     }
2246
2247     if (data->buffer_object)
2248     {
2249         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2250         checkGLcall("glBindBufferARB");
2251     }
2252
2253     LEAVE_GL();
2254
2255     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2256     {
2257         struct wined3d_device *device = surface->resource.device;
2258         unsigned int i;
2259
2260         for (i = 0; i < device->context_count; ++i)
2261         {
2262             context_surface_update(device->contexts[i], surface);
2263         }
2264     }
2265 }
2266
2267 /* This call just allocates the texture, the caller is responsible for binding
2268  * the correct texture. */
2269 /* Context activation is done by the caller. */
2270 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2271         const struct wined3d_format *format, BOOL srgb)
2272 {
2273     BOOL enable_client_storage = FALSE;
2274     GLsizei width = surface->pow2Width;
2275     GLsizei height = surface->pow2Height;
2276     const BYTE *mem = NULL;
2277     GLenum internal;
2278
2279     if (srgb)
2280     {
2281         internal = format->glGammaInternal;
2282     }
2283     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2284     {
2285         internal = format->rtInternal;
2286     }
2287     else
2288     {
2289         internal = format->glInternal;
2290     }
2291
2292     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2293
2294     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2295             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2296             internal, width, height, format->glFormat, format->glType);
2297
2298     ENTER_GL();
2299
2300     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2301     {
2302         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2303                 || !surface->resource.allocatedMemory)
2304         {
2305             /* In some cases we want to disable client storage.
2306              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2307              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2308              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2309              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2310              */
2311             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2312             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2313             surface->flags &= ~SFLAG_CLIENT;
2314             enable_client_storage = TRUE;
2315         }
2316         else
2317         {
2318             surface->flags |= SFLAG_CLIENT;
2319
2320             /* Point OpenGL to our allocated texture memory. Do not use
2321              * resource.allocatedMemory here because it might point into a
2322              * PBO. Instead use heapMemory, but get the alignment right. */
2323             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2324                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2325         }
2326     }
2327
2328     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2329     {
2330         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2331                 internal, width, height, 0, surface->resource.size, mem));
2332         checkGLcall("glCompressedTexImage2DARB");
2333     }
2334     else
2335     {
2336         glTexImage2D(surface->texture_target, surface->texture_level,
2337                 internal, width, height, 0, format->glFormat, format->glType, mem);
2338         checkGLcall("glTexImage2D");
2339     }
2340
2341     if(enable_client_storage) {
2342         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2343         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2344     }
2345     LEAVE_GL();
2346 }
2347
2348 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2349  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2350 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2351 /* GL locking is done by the caller */
2352 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, struct wined3d_surface *rt)
2353 {
2354     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2355     struct wined3d_renderbuffer_entry *entry;
2356     GLuint renderbuffer = 0;
2357     unsigned int src_width, src_height;
2358     unsigned int width, height;
2359
2360     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2361     {
2362         width = rt->pow2Width;
2363         height = rt->pow2Height;
2364     }
2365     else
2366     {
2367         width = surface->pow2Width;
2368         height = surface->pow2Height;
2369     }
2370
2371     src_width = surface->pow2Width;
2372     src_height = surface->pow2Height;
2373
2374     /* A depth stencil smaller than the render target is not valid */
2375     if (width > src_width || height > src_height) return;
2376
2377     /* Remove any renderbuffer set if the sizes match */
2378     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2379             || (width == src_width && height == src_height))
2380     {
2381         surface->current_renderbuffer = NULL;
2382         return;
2383     }
2384
2385     /* Look if we've already got a renderbuffer of the correct dimensions */
2386     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2387     {
2388         if (entry->width == width && entry->height == height)
2389         {
2390             renderbuffer = entry->id;
2391             surface->current_renderbuffer = entry;
2392             break;
2393         }
2394     }
2395
2396     if (!renderbuffer)
2397     {
2398         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2399         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2400         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2401                 surface->resource.format->glInternal, width, height);
2402
2403         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2404         entry->width = width;
2405         entry->height = height;
2406         entry->id = renderbuffer;
2407         list_add_head(&surface->renderbuffers, &entry->entry);
2408
2409         surface->current_renderbuffer = entry;
2410     }
2411
2412     checkGLcall("set_compatible_renderbuffer");
2413 }
2414
2415 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2416 {
2417     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2418
2419     TRACE("surface %p.\n", surface);
2420
2421     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2422     {
2423         ERR("Surface %p is not on a swapchain.\n", surface);
2424         return GL_NONE;
2425     }
2426
2427     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2428     {
2429         if (swapchain->render_to_fbo)
2430         {
2431             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2432             return GL_COLOR_ATTACHMENT0;
2433         }
2434         TRACE("Returning GL_BACK\n");
2435         return GL_BACK;
2436     }
2437     else if (surface == swapchain->front_buffer)
2438     {
2439         TRACE("Returning GL_FRONT\n");
2440         return GL_FRONT;
2441     }
2442
2443     FIXME("Higher back buffer, returning GL_BACK\n");
2444     return GL_BACK;
2445 }
2446
2447 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2448 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2449 {
2450     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2451
2452     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2453         /* No partial locking for textures yet. */
2454         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2455
2456     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2457     if (dirty_rect)
2458     {
2459         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2460         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2461         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2462         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2463     }
2464     else
2465     {
2466         surface->dirtyRect.left = 0;
2467         surface->dirtyRect.top = 0;
2468         surface->dirtyRect.right = surface->resource.width;
2469         surface->dirtyRect.bottom = surface->resource.height;
2470     }
2471
2472     /* if the container is a texture then mark it dirty. */
2473     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2474     {
2475         TRACE("Passing to container.\n");
2476         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2477     }
2478 }
2479
2480 static BOOL surface_convert_color_to_float(struct wined3d_surface *surface,
2481         DWORD color, WINED3DCOLORVALUE *float_color)
2482 {
2483     const struct wined3d_format *format = surface->resource.format;
2484     struct wined3d_device *device = surface->resource.device;
2485
2486     switch (format->id)
2487     {
2488         case WINED3DFMT_P8_UINT:
2489             if (surface->palette)
2490             {
2491                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
2492                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
2493                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
2494             }
2495             else
2496             {
2497                 float_color->r = 0.0f;
2498                 float_color->g = 0.0f;
2499                 float_color->b = 0.0f;
2500             }
2501             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
2502             break;
2503
2504         case WINED3DFMT_B5G6R5_UNORM:
2505             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
2506             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
2507             float_color->b = (color & 0x1f) / 31.0f;
2508             float_color->a = 1.0f;
2509             break;
2510
2511         case WINED3DFMT_B8G8R8_UNORM:
2512         case WINED3DFMT_B8G8R8X8_UNORM:
2513             float_color->r = D3DCOLOR_R(color);
2514             float_color->g = D3DCOLOR_G(color);
2515             float_color->b = D3DCOLOR_B(color);
2516             float_color->a = 1.0f;
2517             break;
2518
2519         case WINED3DFMT_B8G8R8A8_UNORM:
2520             float_color->r = D3DCOLOR_R(color);
2521             float_color->g = D3DCOLOR_G(color);
2522             float_color->b = D3DCOLOR_B(color);
2523             float_color->a = D3DCOLOR_A(color);
2524             break;
2525
2526         default:
2527             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
2528             return FALSE;
2529     }
2530
2531     return TRUE;
2532 }
2533
2534 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2535 {
2536     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2537
2538     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2539
2540     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2541     {
2542         ERR("Not supported on scratch surfaces.\n");
2543         return WINED3DERR_INVALIDCALL;
2544     }
2545
2546     if (!(surface->flags & flag))
2547     {
2548         TRACE("Reloading because surface is dirty\n");
2549     }
2550     /* Reload if either the texture and sysmem have different ideas about the
2551      * color key, or the actual key values changed. */
2552     else if (!(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2553             || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2554             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2555             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2556     {
2557         TRACE("Reloading because of color keying\n");
2558         /* To perform the color key conversion we need a sysmem copy of
2559          * the surface. Make sure we have it. */
2560
2561         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2562         /* Make sure the texture is reloaded because of the color key change,
2563          * this kills performance though :( */
2564         /* TODO: This is not necessarily needed with hw palettized texture support. */
2565         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2566     }
2567     else
2568     {
2569         TRACE("surface is already in texture\n");
2570         return WINED3D_OK;
2571     }
2572
2573     /* No partial locking for textures yet. */
2574     surface_load_location(surface, flag, NULL);
2575     surface_evict_sysmem(surface);
2576
2577     return WINED3D_OK;
2578 }
2579
2580 /* See also float_16_to_32() in wined3d_private.h */
2581 static inline unsigned short float_32_to_16(const float *in)
2582 {
2583     int exp = 0;
2584     float tmp = fabsf(*in);
2585     unsigned int mantissa;
2586     unsigned short ret;
2587
2588     /* Deal with special numbers */
2589     if (*in == 0.0f)
2590         return 0x0000;
2591     if (isnan(*in))
2592         return 0x7c01;
2593     if (isinf(*in))
2594         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2595
2596     if (tmp < powf(2, 10))
2597     {
2598         do
2599         {
2600             tmp = tmp * 2.0f;
2601             exp--;
2602         } while (tmp < powf(2, 10));
2603     }
2604     else if (tmp >= powf(2, 11))
2605     {
2606         do
2607         {
2608             tmp /= 2.0f;
2609             exp++;
2610         } while (tmp >= powf(2, 11));
2611     }
2612
2613     mantissa = (unsigned int)tmp;
2614     if (tmp - mantissa >= 0.5f)
2615         ++mantissa; /* Round to nearest, away from zero. */
2616
2617     exp += 10;  /* Normalize the mantissa. */
2618     exp += 15;  /* Exponent is encoded with excess 15. */
2619
2620     if (exp > 30) /* too big */
2621     {
2622         ret = 0x7c00; /* INF */
2623     }
2624     else if (exp <= 0)
2625     {
2626         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2627         while (exp <= 0)
2628         {
2629             mantissa = mantissa >> 1;
2630             ++exp;
2631         }
2632         ret = mantissa & 0x3ff;
2633     }
2634     else
2635     {
2636         ret = (exp << 10) | (mantissa & 0x3ff);
2637     }
2638
2639     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2640     return ret;
2641 }
2642
2643 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2644 {
2645     ULONG refcount;
2646
2647     TRACE("Surface %p, container %p of type %#x.\n",
2648             surface, surface->container.u.base, surface->container.type);
2649
2650     switch (surface->container.type)
2651     {
2652         case WINED3D_CONTAINER_TEXTURE:
2653             return wined3d_texture_incref(surface->container.u.texture);
2654
2655         case WINED3D_CONTAINER_SWAPCHAIN:
2656             return wined3d_swapchain_incref(surface->container.u.swapchain);
2657
2658         default:
2659             ERR("Unhandled container type %#x.\n", surface->container.type);
2660         case WINED3D_CONTAINER_NONE:
2661             break;
2662     }
2663
2664     refcount = InterlockedIncrement(&surface->resource.ref);
2665     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2666
2667     return refcount;
2668 }
2669
2670 /* Do not call while under the GL lock. */
2671 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2672 {
2673     ULONG refcount;
2674
2675     TRACE("Surface %p, container %p of type %#x.\n",
2676             surface, surface->container.u.base, surface->container.type);
2677
2678     switch (surface->container.type)
2679     {
2680         case WINED3D_CONTAINER_TEXTURE:
2681             return wined3d_texture_decref(surface->container.u.texture);
2682
2683         case WINED3D_CONTAINER_SWAPCHAIN:
2684             return wined3d_swapchain_decref(surface->container.u.swapchain);
2685
2686         default:
2687             ERR("Unhandled container type %#x.\n", surface->container.type);
2688         case WINED3D_CONTAINER_NONE:
2689             break;
2690     }
2691
2692     refcount = InterlockedDecrement(&surface->resource.ref);
2693     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2694
2695     if (!refcount)
2696     {
2697         surface->surface_ops->surface_cleanup(surface);
2698         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2699
2700         TRACE("Destroyed surface %p.\n", surface);
2701         HeapFree(GetProcessHeap(), 0, surface);
2702     }
2703
2704     return refcount;
2705 }
2706
2707 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2708 {
2709     return resource_set_priority(&surface->resource, priority);
2710 }
2711
2712 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2713 {
2714     return resource_get_priority(&surface->resource);
2715 }
2716
2717 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2718 {
2719     TRACE("surface %p.\n", surface);
2720
2721     surface->surface_ops->surface_preload(surface);
2722 }
2723
2724 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2725 {
2726     TRACE("surface %p.\n", surface);
2727
2728     return surface->resource.parent;
2729 }
2730
2731 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2732 {
2733     TRACE("surface %p.\n", surface);
2734
2735     return &surface->resource;
2736 }
2737
2738 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2739 {
2740     TRACE("surface %p, flags %#x.\n", surface, flags);
2741
2742     switch (flags)
2743     {
2744         case WINEDDGBS_CANBLT:
2745         case WINEDDGBS_ISBLTDONE:
2746             return WINED3D_OK;
2747
2748         default:
2749             return WINED3DERR_INVALIDCALL;
2750     }
2751 }
2752
2753 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2754 {
2755     TRACE("surface %p, flags %#x.\n", surface, flags);
2756
2757     /* XXX: DDERR_INVALIDSURFACETYPE */
2758
2759     switch (flags)
2760     {
2761         case WINEDDGFS_CANFLIP:
2762         case WINEDDGFS_ISFLIPDONE:
2763             return WINED3D_OK;
2764
2765         default:
2766             return WINED3DERR_INVALIDCALL;
2767     }
2768 }
2769
2770 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2771 {
2772     TRACE("surface %p.\n", surface);
2773
2774     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2775     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2776 }
2777
2778 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2779 {
2780     TRACE("surface %p.\n", surface);
2781
2782     /* So far we don't lose anything :) */
2783     surface->flags &= ~SFLAG_LOST;
2784     return WINED3D_OK;
2785 }
2786
2787 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2788 {
2789     TRACE("surface %p, palette %p.\n", surface, palette);
2790
2791     if (surface->palette == palette)
2792     {
2793         TRACE("Nop palette change.\n");
2794         return WINED3D_OK;
2795     }
2796
2797     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2798         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2799
2800     surface->palette = palette;
2801
2802     if (palette)
2803     {
2804         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2805             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2806
2807         surface->surface_ops->surface_realize_palette(surface);
2808     }
2809
2810     return WINED3D_OK;
2811 }
2812
2813 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2814         DWORD flags, const WINEDDCOLORKEY *color_key)
2815 {
2816     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
2817
2818     if (flags & WINEDDCKEY_COLORSPACE)
2819     {
2820         FIXME(" colorkey value not supported (%08x) !\n", flags);
2821         return WINED3DERR_INVALIDCALL;
2822     }
2823
2824     /* Dirtify the surface, but only if a key was changed. */
2825     if (color_key)
2826     {
2827         switch (flags & ~WINEDDCKEY_COLORSPACE)
2828         {
2829             case WINEDDCKEY_DESTBLT:
2830                 surface->DestBltCKey = *color_key;
2831                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
2832                 break;
2833
2834             case WINEDDCKEY_DESTOVERLAY:
2835                 surface->DestOverlayCKey = *color_key;
2836                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
2837                 break;
2838
2839             case WINEDDCKEY_SRCOVERLAY:
2840                 surface->SrcOverlayCKey = *color_key;
2841                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
2842                 break;
2843
2844             case WINEDDCKEY_SRCBLT:
2845                 surface->SrcBltCKey = *color_key;
2846                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
2847                 break;
2848         }
2849     }
2850     else
2851     {
2852         switch (flags & ~WINEDDCKEY_COLORSPACE)
2853         {
2854             case WINEDDCKEY_DESTBLT:
2855                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
2856                 break;
2857
2858             case WINEDDCKEY_DESTOVERLAY:
2859                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
2860                 break;
2861
2862             case WINEDDCKEY_SRCOVERLAY:
2863                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
2864                 break;
2865
2866             case WINEDDCKEY_SRCBLT:
2867                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
2868                 break;
2869         }
2870     }
2871
2872     return WINED3D_OK;
2873 }
2874
2875 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
2876 {
2877     TRACE("surface %p.\n", surface);
2878
2879     return surface->palette;
2880 }
2881
2882 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
2883 {
2884     const struct wined3d_format *format = surface->resource.format;
2885     DWORD pitch;
2886
2887     TRACE("surface %p.\n", surface);
2888
2889     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
2890     {
2891         /* Since compressed formats are block based, pitch means the amount of
2892          * bytes to the next row of block rather than the next row of pixels. */
2893         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
2894         pitch = row_block_count * format->block_byte_count;
2895     }
2896     else
2897     {
2898         unsigned char alignment = surface->resource.device->surface_alignment;
2899         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
2900         pitch = (pitch + alignment - 1) & ~(alignment - 1);
2901     }
2902
2903     TRACE("Returning %u.\n", pitch);
2904
2905     return pitch;
2906 }
2907
2908 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
2909 {
2910     TRACE("surface %p, mem %p.\n", surface, mem);
2911
2912     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
2913     {
2914         WARN("Surface is locked or the DC is in use.\n");
2915         return WINED3DERR_INVALIDCALL;
2916     }
2917
2918     return surface->surface_ops->surface_set_mem(surface, mem);
2919 }
2920
2921 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
2922 {
2923     LONG w, h;
2924
2925     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
2926
2927     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2928     {
2929         WARN("Not an overlay surface.\n");
2930         return WINEDDERR_NOTAOVERLAYSURFACE;
2931     }
2932
2933     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
2934     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
2935     surface->overlay_destrect.left = x;
2936     surface->overlay_destrect.top = y;
2937     surface->overlay_destrect.right = x + w;
2938     surface->overlay_destrect.bottom = y + h;
2939
2940     surface->surface_ops->surface_draw_overlay(surface);
2941
2942     return WINED3D_OK;
2943 }
2944
2945 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
2946 {
2947     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
2948
2949     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2950     {
2951         TRACE("Not an overlay surface.\n");
2952         return WINEDDERR_NOTAOVERLAYSURFACE;
2953     }
2954
2955     if (!surface->overlay_dest)
2956     {
2957         TRACE("Overlay not visible.\n");
2958         *x = 0;
2959         *y = 0;
2960         return WINEDDERR_OVERLAYNOTVISIBLE;
2961     }
2962
2963     *x = surface->overlay_destrect.left;
2964     *y = surface->overlay_destrect.top;
2965
2966     TRACE("Returning position %d, %d.\n", *x, *y);
2967
2968     return WINED3D_OK;
2969 }
2970
2971 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
2972         DWORD flags, struct wined3d_surface *ref)
2973 {
2974     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
2975
2976     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2977     {
2978         TRACE("Not an overlay surface.\n");
2979         return WINEDDERR_NOTAOVERLAYSURFACE;
2980     }
2981
2982     return WINED3D_OK;
2983 }
2984
2985 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
2986         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
2987 {
2988     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
2989             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
2990
2991     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2992     {
2993         WARN("Not an overlay surface.\n");
2994         return WINEDDERR_NOTAOVERLAYSURFACE;
2995     }
2996     else if (!dst_surface)
2997     {
2998         WARN("Dest surface is NULL.\n");
2999         return WINED3DERR_INVALIDCALL;
3000     }
3001
3002     if (src_rect)
3003     {
3004         surface->overlay_srcrect = *src_rect;
3005     }
3006     else
3007     {
3008         surface->overlay_srcrect.left = 0;
3009         surface->overlay_srcrect.top = 0;
3010         surface->overlay_srcrect.right = surface->resource.width;
3011         surface->overlay_srcrect.bottom = surface->resource.height;
3012     }
3013
3014     if (dst_rect)
3015     {
3016         surface->overlay_destrect = *dst_rect;
3017     }
3018     else
3019     {
3020         surface->overlay_destrect.left = 0;
3021         surface->overlay_destrect.top = 0;
3022         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3023         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3024     }
3025
3026     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3027     {
3028         list_remove(&surface->overlay_entry);
3029     }
3030
3031     if (flags & WINEDDOVER_SHOW)
3032     {
3033         if (surface->overlay_dest != dst_surface)
3034         {
3035             surface->overlay_dest = dst_surface;
3036             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3037         }
3038     }
3039     else if (flags & WINEDDOVER_HIDE)
3040     {
3041         /* tests show that the rectangles are erased on hide */
3042         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3043         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3044         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3045         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3046         surface->overlay_dest = NULL;
3047     }
3048
3049     surface->surface_ops->surface_draw_overlay(surface);
3050
3051     return WINED3D_OK;
3052 }
3053
3054 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3055 {
3056     TRACE("surface %p, clipper %p.\n", surface, clipper);
3057
3058     surface->clipper = clipper;
3059
3060     return WINED3D_OK;
3061 }
3062
3063 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3064 {
3065     TRACE("surface %p.\n", surface);
3066
3067     return surface->clipper;
3068 }
3069
3070 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3071 {
3072     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3073
3074     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3075
3076     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3077     {
3078         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3079         return WINED3DERR_INVALIDCALL;
3080     }
3081
3082     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3083             surface->pow2Width, surface->pow2Height);
3084     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3085     surface->resource.format = format;
3086
3087     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3088     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3089             format->glFormat, format->glInternal, format->glType);
3090
3091     return WINED3D_OK;
3092 }
3093
3094 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3095         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3096 {
3097     unsigned short *dst_s;
3098     const float *src_f;
3099     unsigned int x, y;
3100
3101     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3102
3103     for (y = 0; y < h; ++y)
3104     {
3105         src_f = (const float *)(src + y * pitch_in);
3106         dst_s = (unsigned short *) (dst + y * pitch_out);
3107         for (x = 0; x < w; ++x)
3108         {
3109             dst_s[x] = float_32_to_16(src_f + x);
3110         }
3111     }
3112 }
3113
3114 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3115         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3116 {
3117     static const unsigned char convert_5to8[] =
3118     {
3119         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3120         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3121         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3122         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3123     };
3124     static const unsigned char convert_6to8[] =
3125     {
3126         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3127         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3128         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3129         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3130         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3131         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3132         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3133         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3134     };
3135     unsigned int x, y;
3136
3137     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3138
3139     for (y = 0; y < h; ++y)
3140     {
3141         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3142         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3143         for (x = 0; x < w; ++x)
3144         {
3145             WORD pixel = src_line[x];
3146             dst_line[x] = 0xff000000
3147                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3148                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3149                     | convert_5to8[(pixel & 0x001f)];
3150         }
3151     }
3152 }
3153
3154 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3155         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3156 {
3157     unsigned int x, y;
3158
3159     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3160
3161     for (y = 0; y < h; ++y)
3162     {
3163         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3164         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3165
3166         for (x = 0; x < w; ++x)
3167         {
3168             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3169         }
3170     }
3171 }
3172
3173 static inline BYTE cliptobyte(int x)
3174 {
3175     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3176 }
3177
3178 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3179         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3180 {
3181     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3182     unsigned int x, y;
3183
3184     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3185
3186     for (y = 0; y < h; ++y)
3187     {
3188         const BYTE *src_line = src + y * pitch_in;
3189         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3190         for (x = 0; x < w; ++x)
3191         {
3192             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3193              *     C = Y - 16; D = U - 128; E = V - 128;
3194              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3195              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3196              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3197              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3198              * U and V are shared between the pixels. */
3199             if (!(x & 1)) /* For every even pixel, read new U and V. */
3200             {
3201                 d = (int) src_line[1] - 128;
3202                 e = (int) src_line[3] - 128;
3203                 r2 = 409 * e + 128;
3204                 g2 = - 100 * d - 208 * e + 128;
3205                 b2 = 516 * d + 128;
3206             }
3207             c2 = 298 * ((int) src_line[0] - 16);
3208             dst_line[x] = 0xff000000
3209                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3210                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3211                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3212                 /* Scale RGB values to 0..255 range,
3213                  * then clip them if still not in range (may be negative),
3214                  * then shift them within DWORD if necessary. */
3215             src_line += 2;
3216         }
3217     }
3218 }
3219
3220 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3221         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3222 {
3223     unsigned int x, y;
3224     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3225
3226     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3227
3228     for (y = 0; y < h; ++y)
3229     {
3230         const BYTE *src_line = src + y * pitch_in;
3231         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3232         for (x = 0; x < w; ++x)
3233         {
3234             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3235              *     C = Y - 16; D = U - 128; E = V - 128;
3236              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3237              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3238              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3239              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3240              * U and V are shared between the pixels. */
3241             if (!(x & 1)) /* For every even pixel, read new U and V. */
3242             {
3243                 d = (int) src_line[1] - 128;
3244                 e = (int) src_line[3] - 128;
3245                 r2 = 409 * e + 128;
3246                 g2 = - 100 * d - 208 * e + 128;
3247                 b2 = 516 * d + 128;
3248             }
3249             c2 = 298 * ((int) src_line[0] - 16);
3250             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3251                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3252                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3253                 /* Scale RGB values to 0..255 range,
3254                  * then clip them if still not in range (may be negative),
3255                  * then shift them within DWORD if necessary. */
3256             src_line += 2;
3257         }
3258     }
3259 }
3260
3261 struct d3dfmt_convertor_desc
3262 {
3263     enum wined3d_format_id from, to;
3264     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3265 };
3266
3267 static const struct d3dfmt_convertor_desc convertors[] =
3268 {
3269     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3270     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3271     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3272     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3273     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3274 };
3275
3276 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3277         enum wined3d_format_id to)
3278 {
3279     unsigned int i;
3280
3281     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3282     {
3283         if (convertors[i].from == from && convertors[i].to == to)
3284             return &convertors[i];
3285     }
3286
3287     return NULL;
3288 }
3289
3290 /*****************************************************************************
3291  * surface_convert_format
3292  *
3293  * Creates a duplicate of a surface in a different format. Is used by Blt to
3294  * blit between surfaces with different formats.
3295  *
3296  * Parameters
3297  *  source: Source surface
3298  *  fmt: Requested destination format
3299  *
3300  *****************************************************************************/
3301 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3302 {
3303     const struct d3dfmt_convertor_desc *conv;
3304     WINED3DLOCKED_RECT lock_src, lock_dst;
3305     struct wined3d_surface *ret = NULL;
3306     HRESULT hr;
3307
3308     conv = find_convertor(source->resource.format->id, to_fmt);
3309     if (!conv)
3310     {
3311         FIXME("Cannot find a conversion function from format %s to %s.\n",
3312                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3313         return NULL;
3314     }
3315
3316     wined3d_surface_create(source->resource.device, source->resource.width,
3317             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3318             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3319             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3320     if (!ret)
3321     {
3322         ERR("Failed to create a destination surface for conversion.\n");
3323         return NULL;
3324     }
3325
3326     memset(&lock_src, 0, sizeof(lock_src));
3327     memset(&lock_dst, 0, sizeof(lock_dst));
3328
3329     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3330     if (FAILED(hr))
3331     {
3332         ERR("Failed to lock the source surface.\n");
3333         wined3d_surface_decref(ret);
3334         return NULL;
3335     }
3336     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3337     if (FAILED(hr))
3338     {
3339         ERR("Failed to lock the destination surface.\n");
3340         wined3d_surface_unmap(source);
3341         wined3d_surface_decref(ret);
3342         return NULL;
3343     }
3344
3345     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3346             source->resource.width, source->resource.height);
3347
3348     wined3d_surface_unmap(ret);
3349     wined3d_surface_unmap(source);
3350
3351     return ret;
3352 }
3353
3354 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3355         unsigned int bpp, UINT pitch, DWORD color)
3356 {
3357     BYTE *first;
3358     int x, y;
3359
3360     /* Do first row */
3361
3362 #define COLORFILL_ROW(type) \
3363 do { \
3364     type *d = (type *)buf; \
3365     for (x = 0; x < width; ++x) \
3366         d[x] = (type)color; \
3367 } while(0)
3368
3369     switch (bpp)
3370     {
3371         case 1:
3372             COLORFILL_ROW(BYTE);
3373             break;
3374
3375         case 2:
3376             COLORFILL_ROW(WORD);
3377             break;
3378
3379         case 3:
3380         {
3381             BYTE *d = buf;
3382             for (x = 0; x < width; ++x, d += 3)
3383             {
3384                 d[0] = (color      ) & 0xFF;
3385                 d[1] = (color >>  8) & 0xFF;
3386                 d[2] = (color >> 16) & 0xFF;
3387             }
3388             break;
3389         }
3390         case 4:
3391             COLORFILL_ROW(DWORD);
3392             break;
3393
3394         default:
3395             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3396             return WINED3DERR_NOTAVAILABLE;
3397     }
3398
3399 #undef COLORFILL_ROW
3400
3401     /* Now copy first row. */
3402     first = buf;
3403     for (y = 1; y < height; ++y)
3404     {
3405         buf += pitch;
3406         memcpy(buf, first, width * bpp);
3407     }
3408
3409     return WINED3D_OK;
3410 }
3411
3412 /* Do not call while under the GL lock. */
3413 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
3414         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
3415         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
3416 {
3417     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
3418             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
3419             flags, fx, debug_d3dtexturefiltertype(filter));
3420
3421     return dst_surface->surface_ops->surface_blt(dst_surface,
3422             dst_rect, src_surface, src_rect, flags, fx, filter);
3423 }
3424
3425 /* Do not call while under the GL lock. */
3426 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
3427         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
3428 {
3429     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, trans %#x.\n",
3430             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
3431
3432     return dst_surface->surface_ops->surface_bltfast(dst_surface,
3433             dst_x, dst_y, src_surface, src_rect, trans);
3434 }
3435
3436 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3437 {
3438     TRACE("surface %p.\n", surface);
3439
3440     if (!(surface->flags & SFLAG_LOCKED))
3441     {
3442         WARN("Trying to unmap unmapped surface.\n");
3443         return WINEDDERR_NOTLOCKED;
3444     }
3445     surface->flags &= ~SFLAG_LOCKED;
3446
3447     surface->surface_ops->surface_unmap(surface);
3448
3449     return WINED3D_OK;
3450 }
3451
3452 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3453         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3454 {
3455     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3456             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3457
3458     if (surface->flags & SFLAG_LOCKED)
3459     {
3460         WARN("Surface is already mapped.\n");
3461         return WINED3DERR_INVALIDCALL;
3462     }
3463     surface->flags |= SFLAG_LOCKED;
3464
3465     if (!(surface->flags & SFLAG_LOCKABLE))
3466         WARN("Trying to lock unlockable surface.\n");
3467
3468     surface->surface_ops->surface_map(surface, rect, flags);
3469
3470     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3471
3472     if (!rect)
3473     {
3474         locked_rect->pBits = surface->resource.allocatedMemory;
3475         surface->lockedRect.left = 0;
3476         surface->lockedRect.top = 0;
3477         surface->lockedRect.right = surface->resource.width;
3478         surface->lockedRect.bottom = surface->resource.height;
3479     }
3480     else
3481     {
3482         const struct wined3d_format *format = surface->resource.format;
3483
3484         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3485         {
3486             /* Compressed textures are block based, so calculate the offset of
3487              * the block that contains the top-left pixel of the locked rectangle. */
3488             locked_rect->pBits = surface->resource.allocatedMemory
3489                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3490                     + ((rect->left / format->block_width) * format->block_byte_count);
3491         }
3492         else
3493         {
3494             locked_rect->pBits = surface->resource.allocatedMemory
3495                     + (locked_rect->Pitch * rect->top)
3496                     + (rect->left * format->byte_count);
3497         }
3498         surface->lockedRect.left = rect->left;
3499         surface->lockedRect.top = rect->top;
3500         surface->lockedRect.right = rect->right;
3501         surface->lockedRect.bottom = rect->bottom;
3502     }
3503
3504     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3505     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3506
3507     return WINED3D_OK;
3508 }
3509
3510 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3511 {
3512     HRESULT hr;
3513
3514     TRACE("surface %p, dc %p.\n", surface, dc);
3515
3516     if (surface->flags & SFLAG_USERPTR)
3517     {
3518         ERR("Not supported on surfaces with application-provided memory.\n");
3519         return WINEDDERR_NODC;
3520     }
3521
3522     /* Give more detailed info for ddraw. */
3523     if (surface->flags & SFLAG_DCINUSE)
3524         return WINEDDERR_DCALREADYCREATED;
3525
3526     /* Can't GetDC if the surface is locked. */
3527     if (surface->flags & SFLAG_LOCKED)
3528         return WINED3DERR_INVALIDCALL;
3529
3530     hr = surface->surface_ops->surface_getdc(surface);
3531     if (FAILED(hr))
3532         return hr;
3533
3534     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3535             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3536     {
3537         /* GetDC on palettized formats is unsupported in D3D9, and the method
3538          * is missing in D3D8, so this should only be used for DX <=7
3539          * surfaces (with non-device palettes). */
3540         const PALETTEENTRY *pal = NULL;
3541
3542         if (surface->palette)
3543         {
3544             pal = surface->palette->palents;
3545         }
3546         else
3547         {
3548             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3549             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3550
3551             if (dds_primary && dds_primary->palette)
3552                 pal = dds_primary->palette->palents;
3553         }
3554
3555         if (pal)
3556         {
3557             RGBQUAD col[256];
3558             unsigned int i;
3559
3560             for (i = 0; i < 256; ++i)
3561             {
3562                 col[i].rgbRed = pal[i].peRed;
3563                 col[i].rgbGreen = pal[i].peGreen;
3564                 col[i].rgbBlue = pal[i].peBlue;
3565                 col[i].rgbReserved = 0;
3566             }
3567             SetDIBColorTable(surface->hDC, 0, 256, col);
3568         }
3569     }
3570
3571     surface->flags |= SFLAG_DCINUSE;
3572
3573     *dc = surface->hDC;
3574     TRACE("Returning dc %p.\n", *dc);
3575
3576     return WINED3D_OK;
3577 }
3578
3579 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3580 {
3581     TRACE("surface %p, dc %p.\n", surface, dc);
3582
3583     if (!(surface->flags & SFLAG_DCINUSE))
3584         return WINEDDERR_NODC;
3585
3586     if (surface->hDC != dc)
3587     {
3588         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3589                 dc, surface->hDC);
3590         return WINEDDERR_NODC;
3591     }
3592
3593     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3594     {
3595         /* Copy the contents of the DIB over to the PBO. */
3596         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->dib.bitmap_size);
3597     }
3598
3599     /* We locked first, so unlock now. */
3600     wined3d_surface_unmap(surface);
3601
3602     surface->flags &= ~SFLAG_DCINUSE;
3603
3604     return WINED3D_OK;
3605 }
3606
3607 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3608 {
3609     struct wined3d_swapchain *swapchain;
3610     HRESULT hr;
3611
3612     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3613
3614     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
3615     {
3616         ERR("Flipped surface is not on a swapchain.\n");
3617         return WINEDDERR_NOTFLIPPABLE;
3618     }
3619     swapchain = surface->container.u.swapchain;
3620
3621     hr = surface->surface_ops->surface_flip(surface, override);
3622     if (FAILED(hr))
3623         return hr;
3624
3625     /* Just overwrite the swapchain presentation interval. This is ok because
3626      * only ddraw apps can call Flip, and only d3d8 and d3d9 applications
3627      * specify the presentation interval. */
3628     if (!(flags & (WINEDDFLIP_NOVSYNC | WINEDDFLIP_INTERVAL2 | WINEDDFLIP_INTERVAL3 | WINEDDFLIP_INTERVAL4)))
3629         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_ONE;
3630     else if (flags & WINEDDFLIP_NOVSYNC)
3631         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
3632     else if (flags & WINEDDFLIP_INTERVAL2)
3633         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_TWO;
3634     else if (flags & WINEDDFLIP_INTERVAL3)
3635         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_THREE;
3636     else
3637         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_FOUR;
3638
3639     return wined3d_swapchain_present(swapchain, NULL, NULL, swapchain->win_handle, NULL, 0);
3640 }
3641
3642 /* Do not call while under the GL lock. */
3643 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3644 {
3645     struct wined3d_device *device = surface->resource.device;
3646
3647     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3648
3649     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3650     {
3651         struct wined3d_texture *texture = surface->container.u.texture;
3652
3653         TRACE("Passing to container (%p).\n", texture);
3654         texture->texture_ops->texture_preload(texture, srgb);
3655     }
3656     else
3657     {
3658         struct wined3d_context *context = NULL;
3659
3660         TRACE("(%p) : About to load surface\n", surface);
3661
3662         if (!device->isInDraw) context = context_acquire(device, NULL);
3663
3664         if (surface->resource.format->id == WINED3DFMT_P8_UINT
3665                 || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3666         {
3667             if (palette9_changed(surface))
3668             {
3669                 TRACE("Reloading surface because the d3d8/9 palette was changed\n");
3670                 /* TODO: This is not necessarily needed with hw palettized texture support */
3671                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3672                 /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
3673                 surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
3674             }
3675         }
3676
3677         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3678
3679         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3680         {
3681             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3682             GLclampf tmp;
3683             tmp = 0.9f;
3684             ENTER_GL();
3685             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3686             LEAVE_GL();
3687         }
3688
3689         if (context) context_release(context);
3690     }
3691 }
3692
3693 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3694 {
3695     if (!surface->resource.allocatedMemory)
3696     {
3697         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3698                 surface->resource.size + RESOURCE_ALIGNMENT);
3699         if (!surface->resource.heapMemory)
3700         {
3701             ERR("Out of memory\n");
3702             return FALSE;
3703         }
3704         surface->resource.allocatedMemory =
3705             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3706     }
3707     else
3708     {
3709         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3710     }
3711
3712     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3713
3714     return TRUE;
3715 }
3716
3717 /* Read the framebuffer back into the surface */
3718 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3719 {
3720     struct wined3d_device *device = surface->resource.device;
3721     const struct wined3d_gl_info *gl_info;
3722     struct wined3d_context *context;
3723     BYTE *mem;
3724     GLint fmt;
3725     GLint type;
3726     BYTE *row, *top, *bottom;
3727     int i;
3728     BOOL bpp;
3729     RECT local_rect;
3730     BOOL srcIsUpsideDown;
3731     GLint rowLen = 0;
3732     GLint skipPix = 0;
3733     GLint skipRow = 0;
3734
3735     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3736         static BOOL warned = FALSE;
3737         if(!warned) {
3738             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3739             warned = TRUE;
3740         }
3741         return;
3742     }
3743
3744     context = context_acquire(device, surface);
3745     context_apply_blit_state(context, device);
3746     gl_info = context->gl_info;
3747
3748     ENTER_GL();
3749
3750     /* Select the correct read buffer, and give some debug output.
3751      * There is no need to keep track of the current read buffer or reset it, every part of the code
3752      * that reads sets the read buffer as desired.
3753      */
3754     if (surface_is_offscreen(surface))
3755     {
3756         /* Mapping the primary render target which is not on a swapchain.
3757          * Read from the back buffer. */
3758         TRACE("Mapping offscreen render target.\n");
3759         glReadBuffer(device->offscreenBuffer);
3760         srcIsUpsideDown = TRUE;
3761     }
3762     else
3763     {
3764         /* Onscreen surfaces are always part of a swapchain */
3765         GLenum buffer = surface_get_gl_buffer(surface);
3766         TRACE("Mapping %#x buffer.\n", buffer);
3767         glReadBuffer(buffer);
3768         checkGLcall("glReadBuffer");
3769         srcIsUpsideDown = FALSE;
3770     }
3771
3772     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3773     if (!rect)
3774     {
3775         local_rect.left = 0;
3776         local_rect.top = 0;
3777         local_rect.right = surface->resource.width;
3778         local_rect.bottom = surface->resource.height;
3779     }
3780     else
3781     {
3782         local_rect = *rect;
3783     }
3784     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3785
3786     switch (surface->resource.format->id)
3787     {
3788         case WINED3DFMT_P8_UINT:
3789         {
3790             if (primary_render_target_is_p8(device))
3791             {
3792                 /* In case of P8 render targets the index is stored in the alpha component */
3793                 fmt = GL_ALPHA;
3794                 type = GL_UNSIGNED_BYTE;
3795                 mem = dest;
3796                 bpp = surface->resource.format->byte_count;
3797             }
3798             else
3799             {
3800                 /* GL can't return palettized data, so read ARGB pixels into a
3801                  * separate block of memory and convert them into palettized format
3802                  * in software. Slow, but if the app means to use palettized render
3803                  * targets and locks it...
3804                  *
3805                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
3806                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
3807                  * for the color channels when palettizing the colors.
3808                  */
3809                 fmt = GL_RGB;
3810                 type = GL_UNSIGNED_BYTE;
3811                 pitch *= 3;
3812                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
3813                 if (!mem)
3814                 {
3815                     ERR("Out of memory\n");
3816                     LEAVE_GL();
3817                     return;
3818                 }
3819                 bpp = surface->resource.format->byte_count * 3;
3820             }
3821         }
3822         break;
3823
3824         default:
3825             mem = dest;
3826             fmt = surface->resource.format->glFormat;
3827             type = surface->resource.format->glType;
3828             bpp = surface->resource.format->byte_count;
3829     }
3830
3831     if (surface->flags & SFLAG_PBO)
3832     {
3833         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
3834         checkGLcall("glBindBufferARB");
3835         if (mem)
3836         {
3837             ERR("mem not null for pbo -- unexpected\n");
3838             mem = NULL;
3839         }
3840     }
3841
3842     /* Save old pixel store pack state */
3843     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
3844     checkGLcall("glGetIntegerv");
3845     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
3846     checkGLcall("glGetIntegerv");
3847     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
3848     checkGLcall("glGetIntegerv");
3849
3850     /* Setup pixel store pack state -- to glReadPixels into the correct place */
3851     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
3852     checkGLcall("glPixelStorei");
3853     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
3854     checkGLcall("glPixelStorei");
3855     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
3856     checkGLcall("glPixelStorei");
3857
3858     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
3859             local_rect.right - local_rect.left,
3860             local_rect.bottom - local_rect.top,
3861             fmt, type, mem);
3862     checkGLcall("glReadPixels");
3863
3864     /* Reset previous pixel store pack state */
3865     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
3866     checkGLcall("glPixelStorei");
3867     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
3868     checkGLcall("glPixelStorei");
3869     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
3870     checkGLcall("glPixelStorei");
3871
3872     if (surface->flags & SFLAG_PBO)
3873     {
3874         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
3875         checkGLcall("glBindBufferARB");
3876
3877         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
3878          * to get a pointer to it and perform the flipping in software. This is a lot
3879          * faster than calling glReadPixels for each line. In case we want more speed
3880          * we should rerender it flipped in a FBO and read the data back from the FBO. */
3881         if (!srcIsUpsideDown)
3882         {
3883             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
3884             checkGLcall("glBindBufferARB");
3885
3886             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
3887             checkGLcall("glMapBufferARB");
3888         }
3889     }
3890
3891     /* TODO: Merge this with the palettization loop below for P8 targets */
3892     if(!srcIsUpsideDown) {
3893         UINT len, off;
3894         /* glReadPixels returns the image upside down, and there is no way to prevent this.
3895             Flip the lines in software */
3896         len = (local_rect.right - local_rect.left) * bpp;
3897         off = local_rect.left * bpp;
3898
3899         row = HeapAlloc(GetProcessHeap(), 0, len);
3900         if(!row) {
3901             ERR("Out of memory\n");
3902             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
3903                 HeapFree(GetProcessHeap(), 0, mem);
3904             LEAVE_GL();
3905             return;
3906         }
3907
3908         top = mem + pitch * local_rect.top;
3909         bottom = mem + pitch * (local_rect.bottom - 1);
3910         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
3911             memcpy(row, top + off, len);
3912             memcpy(top + off, bottom + off, len);
3913             memcpy(bottom + off, row, len);
3914             top += pitch;
3915             bottom -= pitch;
3916         }
3917         HeapFree(GetProcessHeap(), 0, row);
3918
3919         /* Unmap the temp PBO buffer */
3920         if (surface->flags & SFLAG_PBO)
3921         {
3922             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
3923             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
3924         }
3925     }
3926
3927     LEAVE_GL();
3928     context_release(context);
3929
3930     /* For P8 textures we need to perform an inverse palette lookup. This is
3931      * done by searching for a palette index which matches the RGB value.
3932      * Note this isn't guaranteed to work when there are multiple entries for
3933      * the same color but we have no choice. In case of P8 render targets,
3934      * the index is stored in the alpha component so no conversion is needed. */
3935     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
3936     {
3937         const PALETTEENTRY *pal = NULL;
3938         DWORD width = pitch / 3;
3939         int x, y, c;
3940
3941         if (surface->palette)
3942         {
3943             pal = surface->palette->palents;
3944         }
3945         else
3946         {
3947             ERR("Palette is missing, cannot perform inverse palette lookup\n");
3948             HeapFree(GetProcessHeap(), 0, mem);
3949             return;
3950         }
3951
3952         for(y = local_rect.top; y < local_rect.bottom; y++) {
3953             for(x = local_rect.left; x < local_rect.right; x++) {
3954                 /*                      start              lines            pixels      */
3955                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
3956                 const BYTE *green = blue  + 1;
3957                 const BYTE *red = green + 1;
3958
3959                 for(c = 0; c < 256; c++) {
3960                     if(*red   == pal[c].peRed   &&
3961                        *green == pal[c].peGreen &&
3962                        *blue  == pal[c].peBlue)
3963                     {
3964                         *((BYTE *) dest + y * width + x) = c;
3965                         break;
3966                     }
3967                 }
3968             }
3969         }
3970         HeapFree(GetProcessHeap(), 0, mem);
3971     }
3972 }
3973
3974 /* Read the framebuffer contents into a texture */
3975 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
3976 {
3977     struct wined3d_device *device = surface->resource.device;
3978     const struct wined3d_gl_info *gl_info;
3979     struct wined3d_context *context;
3980
3981     if (!surface_is_offscreen(surface))
3982     {
3983         /* We would need to flip onscreen surfaces, but there's no efficient
3984          * way to do that here. It makes more sense for the caller to
3985          * explicitly go through sysmem. */
3986         ERR("Not supported for onscreen targets.\n");
3987         return;
3988     }
3989
3990     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
3991      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
3992      * states in the stateblock, and no driver was found yet that had bugs in that regard.
3993      */
3994     context = context_acquire(device, surface);
3995     gl_info = context->gl_info;
3996
3997     surface_prepare_texture(surface, gl_info, srgb);
3998     surface_bind_and_dirtify(surface, gl_info, srgb);
3999
4000     TRACE("Reading back offscreen render target %p.\n", surface);
4001
4002     ENTER_GL();
4003
4004     glReadBuffer(device->offscreenBuffer);
4005     checkGLcall("glReadBuffer");
4006
4007     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4008             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4009     checkGLcall("glCopyTexSubImage2D");
4010
4011     LEAVE_GL();
4012
4013     context_release(context);
4014 }
4015
4016 /* Context activation is done by the caller. */
4017 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4018         const struct wined3d_gl_info *gl_info, BOOL srgb)
4019 {
4020     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4021     CONVERT_TYPES convert;
4022     struct wined3d_format format;
4023
4024     if (surface->flags & alloc_flag) return;
4025
4026     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4027     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4028     else surface->flags &= ~SFLAG_CONVERTED;
4029
4030     surface_bind_and_dirtify(surface, gl_info, srgb);
4031     surface_allocate_surface(surface, gl_info, &format, srgb);
4032     surface->flags |= alloc_flag;
4033 }
4034
4035 /* Context activation is done by the caller. */
4036 void surface_prepare_texture(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
4037 {
4038     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4039     {
4040         struct wined3d_texture *texture = surface->container.u.texture;
4041         UINT sub_count = texture->level_count * texture->layer_count;
4042         UINT i;
4043
4044         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4045
4046         for (i = 0; i < sub_count; ++i)
4047         {
4048             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4049             surface_prepare_texture_internal(s, gl_info, srgb);
4050         }
4051
4052         return;
4053     }
4054
4055     surface_prepare_texture_internal(surface, gl_info, srgb);
4056 }
4057
4058 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4059         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4060 {
4061     struct wined3d_device *device = surface->resource.device;
4062     UINT pitch = wined3d_surface_get_pitch(surface);
4063     const struct wined3d_gl_info *gl_info;
4064     struct wined3d_context *context;
4065     RECT local_rect;
4066     UINT w, h;
4067
4068     surface_get_rect(surface, rect, &local_rect);
4069
4070     mem += local_rect.top * pitch + local_rect.left * bpp;
4071     w = local_rect.right - local_rect.left;
4072     h = local_rect.bottom - local_rect.top;
4073
4074     /* Activate the correct context for the render target */
4075     context = context_acquire(device, surface);
4076     context_apply_blit_state(context, device);
4077     gl_info = context->gl_info;
4078
4079     ENTER_GL();
4080
4081     if (!surface_is_offscreen(surface))
4082     {
4083         GLenum buffer = surface_get_gl_buffer(surface);
4084         TRACE("Unlocking %#x buffer.\n", buffer);
4085         context_set_draw_buffer(context, buffer);
4086
4087         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4088         glPixelZoom(1.0f, -1.0f);
4089     }
4090     else
4091     {
4092         /* Primary offscreen render target */
4093         TRACE("Offscreen render target.\n");
4094         context_set_draw_buffer(context, device->offscreenBuffer);
4095
4096         glPixelZoom(1.0f, 1.0f);
4097     }
4098
4099     glRasterPos3i(local_rect.left, local_rect.top, 1);
4100     checkGLcall("glRasterPos3i");
4101
4102     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4103     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4104
4105     if (surface->flags & SFLAG_PBO)
4106     {
4107         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4108         checkGLcall("glBindBufferARB");
4109     }
4110
4111     glDrawPixels(w, h, fmt, type, mem);
4112     checkGLcall("glDrawPixels");
4113
4114     if (surface->flags & SFLAG_PBO)
4115     {
4116         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4117         checkGLcall("glBindBufferARB");
4118     }
4119
4120     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4121     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4122
4123     LEAVE_GL();
4124
4125     if (wined3d_settings.strict_draw_ordering
4126             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4127             && surface->container.u.swapchain->front_buffer == surface))
4128         wglFlush();
4129
4130     context_release(context);
4131 }
4132
4133 HRESULT d3dfmt_get_conv(struct wined3d_surface *surface, BOOL need_alpha_ck,
4134         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4135 {
4136     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4137     struct wined3d_device *device = surface->resource.device;
4138     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4139     BOOL blit_supported = FALSE;
4140
4141     /* Copy the default values from the surface. Below we might perform fixups */
4142     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4143     *format = *surface->resource.format;
4144     *convert = NO_CONVERSION;
4145
4146     /* Ok, now look if we have to do any conversion */
4147     switch (surface->resource.format->id)
4148     {
4149         case WINED3DFMT_P8_UINT:
4150             /* Below the call to blit_supported is disabled for Wine 1.2
4151              * because the function isn't operating correctly yet. At the
4152              * moment 8-bit blits are handled in software and if certain GL
4153              * extensions are around, surface conversion is performed at
4154              * upload time. The blit_supported call recognizes it as a
4155              * destination fixup. This type of upload 'fixup' and 8-bit to
4156              * 8-bit blits need to be handled by the blit_shader.
4157              * TODO: get rid of this #if 0. */
4158 #if 0
4159             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4160                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4161                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4162 #endif
4163             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4164
4165             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4166              * texturing. Further also use conversion in case of color keying.
4167              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4168              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4169              * conflicts with this.
4170              */
4171             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4172                     || colorkey_active || !use_texturing)
4173             {
4174                 format->glFormat = GL_RGBA;
4175                 format->glInternal = GL_RGBA;
4176                 format->glType = GL_UNSIGNED_BYTE;
4177                 format->conv_byte_count = 4;
4178                 if (colorkey_active)
4179                     *convert = CONVERT_PALETTED_CK;
4180                 else
4181                     *convert = CONVERT_PALETTED;
4182             }
4183             break;
4184
4185         case WINED3DFMT_B2G3R3_UNORM:
4186             /* **********************
4187                 GL_UNSIGNED_BYTE_3_3_2
4188                 ********************** */
4189             if (colorkey_active) {
4190                 /* This texture format will never be used.. So do not care about color keying
4191                     up until the point in time it will be needed :-) */
4192                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4193             }
4194             break;
4195
4196         case WINED3DFMT_B5G6R5_UNORM:
4197             if (colorkey_active)
4198             {
4199                 *convert = CONVERT_CK_565;
4200                 format->glFormat = GL_RGBA;
4201                 format->glInternal = GL_RGB5_A1;
4202                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4203                 format->conv_byte_count = 2;
4204             }
4205             break;
4206
4207         case WINED3DFMT_B5G5R5X1_UNORM:
4208             if (colorkey_active)
4209             {
4210                 *convert = CONVERT_CK_5551;
4211                 format->glFormat = GL_BGRA;
4212                 format->glInternal = GL_RGB5_A1;
4213                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4214                 format->conv_byte_count = 2;
4215             }
4216             break;
4217
4218         case WINED3DFMT_B8G8R8_UNORM:
4219             if (colorkey_active)
4220             {
4221                 *convert = CONVERT_CK_RGB24;
4222                 format->glFormat = GL_RGBA;
4223                 format->glInternal = GL_RGBA8;
4224                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4225                 format->conv_byte_count = 4;
4226             }
4227             break;
4228
4229         case WINED3DFMT_B8G8R8X8_UNORM:
4230             if (colorkey_active)
4231             {
4232                 *convert = CONVERT_RGB32_888;
4233                 format->glFormat = GL_RGBA;
4234                 format->glInternal = GL_RGBA8;
4235                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4236                 format->conv_byte_count = 4;
4237             }
4238             break;
4239
4240         default:
4241             break;
4242     }
4243
4244     return WINED3D_OK;
4245 }
4246
4247 void d3dfmt_p8_init_palette(struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4248 {
4249     struct wined3d_device *device = surface->resource.device;
4250     struct wined3d_palette *pal = surface->palette;
4251     BOOL index_in_alpha = FALSE;
4252     unsigned int i;
4253
4254     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4255      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4256      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4257      * duplicate entries. Store the color key in the unused alpha component to speed the
4258      * download up and to make conversion unneeded. */
4259     index_in_alpha = primary_render_target_is_p8(device);
4260
4261     if (!pal)
4262     {
4263         /* In DirectDraw the palette is a property of the surface, there are no such things as device palettes. */
4264         if (device->wined3d->flags & WINED3D_PALETTE_PER_SURFACE)
4265         {
4266             ERR("This code should never get entered for DirectDraw!, expect problems\n");
4267             if (index_in_alpha)
4268             {
4269                 /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4270                  * there's no palette at this time. */
4271                 for (i = 0; i < 256; i++) table[i][3] = i;
4272             }
4273         }
4274         else
4275         {
4276             /* Direct3D >= 8 palette usage style: P8 textures use device palettes, palette entry format is A8R8G8B8,
4277              * alpha is stored in peFlags and may be used by the app if D3DPTEXTURECAPS_ALPHAPALETTE device
4278              * capability flag is present (wine does advertise this capability) */
4279             for (i = 0; i < 256; ++i)
4280             {
4281                 table[i][0] = device->palettes[device->currentPalette][i].peRed;
4282                 table[i][1] = device->palettes[device->currentPalette][i].peGreen;
4283                 table[i][2] = device->palettes[device->currentPalette][i].peBlue;
4284                 table[i][3] = device->palettes[device->currentPalette][i].peFlags;
4285             }
4286         }
4287     }
4288     else
4289     {
4290         TRACE("Using surface palette %p\n", pal);
4291         /* Get the surface's palette */
4292         for (i = 0; i < 256; ++i)
4293         {
4294             table[i][0] = pal->palents[i].peRed;
4295             table[i][1] = pal->palents[i].peGreen;
4296             table[i][2] = pal->palents[i].peBlue;
4297
4298             /* When index_in_alpha is set the palette index is stored in the
4299              * alpha component. In case of a readback we can then read
4300              * GL_ALPHA. Color keying is handled in BltOverride using a
4301              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4302              * color key itself is passed to glAlphaFunc in other cases the
4303              * alpha component of pixels that should be masked away is set to 0. */
4304             if (index_in_alpha)
4305             {
4306                 table[i][3] = i;
4307             }
4308             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4309                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4310             {
4311                 table[i][3] = 0x00;
4312             }
4313             else if (pal->flags & WINEDDPCAPS_ALPHA)
4314             {
4315                 table[i][3] = pal->palents[i].peFlags;
4316             }
4317             else
4318             {
4319                 table[i][3] = 0xFF;
4320             }
4321         }
4322     }
4323 }
4324
4325 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4326         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4327 {
4328     const BYTE *source;
4329     BYTE *dest;
4330     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4331
4332     switch (convert) {
4333         case NO_CONVERSION:
4334         {
4335             memcpy(dst, src, pitch * height);
4336             break;
4337         }
4338         case CONVERT_PALETTED:
4339         case CONVERT_PALETTED_CK:
4340         {
4341             BYTE table[256][4];
4342             unsigned int x, y;
4343
4344             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4345
4346             for (y = 0; y < height; y++)
4347             {
4348                 source = src + pitch * y;
4349                 dest = dst + outpitch * y;
4350                 /* This is an 1 bpp format, using the width here is fine */
4351                 for (x = 0; x < width; x++) {
4352                     BYTE color = *source++;
4353                     *dest++ = table[color][0];
4354                     *dest++ = table[color][1];
4355                     *dest++ = table[color][2];
4356                     *dest++ = table[color][3];
4357                 }
4358             }
4359         }
4360         break;
4361
4362         case CONVERT_CK_565:
4363         {
4364             /* Converting the 565 format in 5551 packed to emulate color-keying.
4365
4366               Note : in all these conversion, it would be best to average the averaging
4367                       pixels to get the color of the pixel that will be color-keyed to
4368                       prevent 'color bleeding'. This will be done later on if ever it is
4369                       too visible.
4370
4371               Note2: Nvidia documents say that their driver does not support alpha + color keying
4372                      on the same surface and disables color keying in such a case
4373             */
4374             unsigned int x, y;
4375             const WORD *Source;
4376             WORD *Dest;
4377
4378             TRACE("Color keyed 565\n");
4379
4380             for (y = 0; y < height; y++) {
4381                 Source = (const WORD *)(src + y * pitch);
4382                 Dest = (WORD *) (dst + y * outpitch);
4383                 for (x = 0; x < width; x++ ) {
4384                     WORD color = *Source++;
4385                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4386                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4387                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4388                         *Dest |= 0x0001;
4389                     Dest++;
4390                 }
4391             }
4392         }
4393         break;
4394
4395         case CONVERT_CK_5551:
4396         {
4397             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4398             unsigned int x, y;
4399             const WORD *Source;
4400             WORD *Dest;
4401             TRACE("Color keyed 5551\n");
4402             for (y = 0; y < height; y++) {
4403                 Source = (const WORD *)(src + y * pitch);
4404                 Dest = (WORD *) (dst + y * outpitch);
4405                 for (x = 0; x < width; x++ ) {
4406                     WORD color = *Source++;
4407                     *Dest = color;
4408                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4409                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4410                         *Dest |= (1 << 15);
4411                     else
4412                         *Dest &= ~(1 << 15);
4413                     Dest++;
4414                 }
4415             }
4416         }
4417         break;
4418
4419         case CONVERT_CK_RGB24:
4420         {
4421             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4422             unsigned int x, y;
4423             for (y = 0; y < height; y++)
4424             {
4425                 source = src + pitch * y;
4426                 dest = dst + outpitch * y;
4427                 for (x = 0; x < width; x++) {
4428                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4429                     DWORD dstcolor = color << 8;
4430                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4431                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4432                         dstcolor |= 0xff;
4433                     *(DWORD*)dest = dstcolor;
4434                     source += 3;
4435                     dest += 4;
4436                 }
4437             }
4438         }
4439         break;
4440
4441         case CONVERT_RGB32_888:
4442         {
4443             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4444             unsigned int x, y;
4445             for (y = 0; y < height; y++)
4446             {
4447                 source = src + pitch * y;
4448                 dest = dst + outpitch * y;
4449                 for (x = 0; x < width; x++) {
4450                     DWORD color = 0xffffff & *(const DWORD*)source;
4451                     DWORD dstcolor = color << 8;
4452                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4453                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4454                         dstcolor |= 0xff;
4455                     *(DWORD*)dest = dstcolor;
4456                     source += 4;
4457                     dest += 4;
4458                 }
4459             }
4460         }
4461         break;
4462
4463         default:
4464             ERR("Unsupported conversion type %#x.\n", convert);
4465     }
4466     return WINED3D_OK;
4467 }
4468
4469 BOOL palette9_changed(struct wined3d_surface *surface)
4470 {
4471     struct wined3d_device *device = surface->resource.device;
4472
4473     if (surface->palette || (surface->resource.format->id != WINED3DFMT_P8_UINT
4474             && surface->resource.format->id != WINED3DFMT_P8_UINT_A8_UNORM))
4475     {
4476         /* If a ddraw-style palette is attached assume no d3d9 palette change.
4477          * Also the palette isn't interesting if the surface format isn't P8 or A8P8
4478          */
4479         return FALSE;
4480     }
4481
4482     if (surface->palette9)
4483     {
4484         if (!memcmp(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256))
4485         {
4486             return FALSE;
4487         }
4488     }
4489     else
4490     {
4491         surface->palette9 = HeapAlloc(GetProcessHeap(), 0, sizeof(PALETTEENTRY) * 256);
4492     }
4493     memcpy(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256);
4494
4495     return TRUE;
4496 }
4497
4498 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4499 {
4500     /* Flip the surface contents */
4501     /* Flip the DC */
4502     {
4503         HDC tmp;
4504         tmp = front->hDC;
4505         front->hDC = back->hDC;
4506         back->hDC = tmp;
4507     }
4508
4509     /* Flip the DIBsection */
4510     {
4511         HBITMAP tmp;
4512         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4513         tmp = front->dib.DIBsection;
4514         front->dib.DIBsection = back->dib.DIBsection;
4515         back->dib.DIBsection = tmp;
4516
4517         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4518         else front->flags &= ~SFLAG_DIBSECTION;
4519         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4520         else back->flags &= ~SFLAG_DIBSECTION;
4521     }
4522
4523     /* Flip the surface data */
4524     {
4525         void* tmp;
4526
4527         tmp = front->dib.bitmap_data;
4528         front->dib.bitmap_data = back->dib.bitmap_data;
4529         back->dib.bitmap_data = tmp;
4530
4531         tmp = front->resource.allocatedMemory;
4532         front->resource.allocatedMemory = back->resource.allocatedMemory;
4533         back->resource.allocatedMemory = tmp;
4534
4535         tmp = front->resource.heapMemory;
4536         front->resource.heapMemory = back->resource.heapMemory;
4537         back->resource.heapMemory = tmp;
4538     }
4539
4540     /* Flip the PBO */
4541     {
4542         GLuint tmp_pbo = front->pbo;
4543         front->pbo = back->pbo;
4544         back->pbo = tmp_pbo;
4545     }
4546
4547     /* client_memory should not be different, but just in case */
4548     {
4549         BOOL tmp;
4550         tmp = front->dib.client_memory;
4551         front->dib.client_memory = back->dib.client_memory;
4552         back->dib.client_memory = tmp;
4553     }
4554
4555     /* Flip the opengl texture */
4556     {
4557         GLuint tmp;
4558
4559         tmp = back->texture_name;
4560         back->texture_name = front->texture_name;
4561         front->texture_name = tmp;
4562
4563         tmp = back->texture_name_srgb;
4564         back->texture_name_srgb = front->texture_name_srgb;
4565         front->texture_name_srgb = tmp;
4566
4567         resource_unload(&back->resource);
4568         resource_unload(&front->resource);
4569     }
4570
4571     {
4572         DWORD tmp_flags = back->flags;
4573         back->flags = front->flags;
4574         front->flags = tmp_flags;
4575     }
4576 }
4577
4578 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4579  * pixel copy calls. */
4580 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4581         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4582 {
4583     struct wined3d_device *device = dst_surface->resource.device;
4584     float xrel, yrel;
4585     UINT row;
4586     struct wined3d_context *context;
4587     BOOL upsidedown = FALSE;
4588     RECT dst_rect = *dst_rect_in;
4589
4590     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4591      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4592      */
4593     if(dst_rect.top > dst_rect.bottom) {
4594         UINT tmp = dst_rect.bottom;
4595         dst_rect.bottom = dst_rect.top;
4596         dst_rect.top = tmp;
4597         upsidedown = TRUE;
4598     }
4599
4600     context = context_acquire(device, src_surface);
4601     context_apply_blit_state(context, device);
4602     surface_internal_preload(dst_surface, SRGB_RGB);
4603     ENTER_GL();
4604
4605     /* Bind the target texture */
4606     glBindTexture(dst_surface->texture_target, dst_surface->texture_name);
4607     checkGLcall("glBindTexture");
4608     if (surface_is_offscreen(src_surface))
4609     {
4610         TRACE("Reading from an offscreen target\n");
4611         upsidedown = !upsidedown;
4612         glReadBuffer(device->offscreenBuffer);
4613     }
4614     else
4615     {
4616         glReadBuffer(surface_get_gl_buffer(src_surface));
4617     }
4618     checkGLcall("glReadBuffer");
4619
4620     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4621     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4622
4623     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4624     {
4625         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4626
4627         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4628             ERR("Texture filtering not supported in direct blit\n");
4629         }
4630     }
4631     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4632             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4633     {
4634         ERR("Texture filtering not supported in direct blit\n");
4635     }
4636
4637     if (upsidedown
4638             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4639             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4640     {
4641         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4642
4643         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4644                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4645                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4646                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4647     }
4648     else
4649     {
4650         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4651         /* I have to process this row by row to swap the image,
4652          * otherwise it would be upside down, so stretching in y direction
4653          * doesn't cost extra time
4654          *
4655          * However, stretching in x direction can be avoided if not necessary
4656          */
4657         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4658             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4659             {
4660                 /* Well, that stuff works, but it's very slow.
4661                  * find a better way instead
4662                  */
4663                 UINT col;
4664
4665                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4666                 {
4667                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4668                             dst_rect.left + col /* x offset */, row /* y offset */,
4669                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4670                 }
4671             }
4672             else
4673             {
4674                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4675                         dst_rect.left /* x offset */, row /* y offset */,
4676                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4677             }
4678         }
4679     }
4680     checkGLcall("glCopyTexSubImage2D");
4681
4682     LEAVE_GL();
4683     context_release(context);
4684
4685     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4686      * path is never entered
4687      */
4688     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4689 }
4690
4691 /* Uses the hardware to stretch and flip the image */
4692 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4693         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4694 {
4695     struct wined3d_device *device = dst_surface->resource.device;
4696     struct wined3d_swapchain *src_swapchain = NULL;
4697     GLuint src, backup = 0;
4698     float left, right, top, bottom; /* Texture coordinates */
4699     UINT fbwidth = src_surface->resource.width;
4700     UINT fbheight = src_surface->resource.height;
4701     struct wined3d_context *context;
4702     GLenum drawBuffer = GL_BACK;
4703     GLenum texture_target;
4704     BOOL noBackBufferBackup;
4705     BOOL src_offscreen;
4706     BOOL upsidedown = FALSE;
4707     RECT dst_rect = *dst_rect_in;
4708
4709     TRACE("Using hwstretch blit\n");
4710     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4711     context = context_acquire(device, src_surface);
4712     context_apply_blit_state(context, device);
4713     surface_internal_preload(dst_surface, SRGB_RGB);
4714
4715     src_offscreen = surface_is_offscreen(src_surface);
4716     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4717     if (!noBackBufferBackup && !src_surface->texture_name)
4718     {
4719         /* Get it a description */
4720         surface_internal_preload(src_surface, SRGB_RGB);
4721     }
4722     ENTER_GL();
4723
4724     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4725      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4726      */
4727     if (context->aux_buffers >= 2)
4728     {
4729         /* Got more than one aux buffer? Use the 2nd aux buffer */
4730         drawBuffer = GL_AUX1;
4731     }
4732     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4733     {
4734         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4735         drawBuffer = GL_AUX0;
4736     }
4737
4738     if(noBackBufferBackup) {
4739         glGenTextures(1, &backup);
4740         checkGLcall("glGenTextures");
4741         glBindTexture(GL_TEXTURE_2D, backup);
4742         checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4743         texture_target = GL_TEXTURE_2D;
4744     } else {
4745         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4746          * we are reading from the back buffer, the backup can be used as source texture
4747          */
4748         texture_target = src_surface->texture_target;
4749         glBindTexture(texture_target, src_surface->texture_name);
4750         checkGLcall("glBindTexture(texture_target, src_surface->texture_name)");
4751         glEnable(texture_target);
4752         checkGLcall("glEnable(texture_target)");
4753
4754         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4755         src_surface->flags &= ~SFLAG_INTEXTURE;
4756     }
4757
4758     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4759      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4760      */
4761     if(dst_rect.top > dst_rect.bottom) {
4762         UINT tmp = dst_rect.bottom;
4763         dst_rect.bottom = dst_rect.top;
4764         dst_rect.top = tmp;
4765         upsidedown = TRUE;
4766     }
4767
4768     if (src_offscreen)
4769     {
4770         TRACE("Reading from an offscreen target\n");
4771         upsidedown = !upsidedown;
4772         glReadBuffer(device->offscreenBuffer);
4773     }
4774     else
4775     {
4776         glReadBuffer(surface_get_gl_buffer(src_surface));
4777     }
4778
4779     /* TODO: Only back up the part that will be overwritten */
4780     glCopyTexSubImage2D(texture_target, 0,
4781                         0, 0 /* read offsets */,
4782                         0, 0,
4783                         fbwidth,
4784                         fbheight);
4785
4786     checkGLcall("glCopyTexSubImage2D");
4787
4788     /* No issue with overriding these - the sampler is dirty due to blit usage */
4789     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
4790             wined3d_gl_mag_filter(magLookup, Filter));
4791     checkGLcall("glTexParameteri");
4792     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
4793             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
4794     checkGLcall("glTexParameteri");
4795
4796     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4797         src_swapchain = src_surface->container.u.swapchain;
4798     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
4799     {
4800         src = backup ? backup : src_surface->texture_name;
4801     }
4802     else
4803     {
4804         glReadBuffer(GL_FRONT);
4805         checkGLcall("glReadBuffer(GL_FRONT)");
4806
4807         glGenTextures(1, &src);
4808         checkGLcall("glGenTextures(1, &src)");
4809         glBindTexture(GL_TEXTURE_2D, src);
4810         checkGLcall("glBindTexture(GL_TEXTURE_2D, src)");
4811
4812         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
4813          * out for power of 2 sizes
4814          */
4815         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
4816                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
4817         checkGLcall("glTexImage2D");
4818         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
4819                             0, 0 /* read offsets */,
4820                             0, 0,
4821                             fbwidth,
4822                             fbheight);
4823
4824         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
4825         checkGLcall("glTexParameteri");
4826         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
4827         checkGLcall("glTexParameteri");
4828
4829         glReadBuffer(GL_BACK);
4830         checkGLcall("glReadBuffer(GL_BACK)");
4831
4832         if(texture_target != GL_TEXTURE_2D) {
4833             glDisable(texture_target);
4834             glEnable(GL_TEXTURE_2D);
4835             texture_target = GL_TEXTURE_2D;
4836         }
4837     }
4838     checkGLcall("glEnd and previous");
4839
4840     left = src_rect->left;
4841     right = src_rect->right;
4842
4843     if (!upsidedown)
4844     {
4845         top = src_surface->resource.height - src_rect->top;
4846         bottom = src_surface->resource.height - src_rect->bottom;
4847     }
4848     else
4849     {
4850         top = src_surface->resource.height - src_rect->bottom;
4851         bottom = src_surface->resource.height - src_rect->top;
4852     }
4853
4854     if (src_surface->flags & SFLAG_NORMCOORD)
4855     {
4856         left /= src_surface->pow2Width;
4857         right /= src_surface->pow2Width;
4858         top /= src_surface->pow2Height;
4859         bottom /= src_surface->pow2Height;
4860     }
4861
4862     /* draw the source texture stretched and upside down. The correct surface is bound already */
4863     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
4864     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
4865
4866     context_set_draw_buffer(context, drawBuffer);
4867     glReadBuffer(drawBuffer);
4868
4869     glBegin(GL_QUADS);
4870         /* bottom left */
4871         glTexCoord2f(left, bottom);
4872         glVertex2i(0, 0);
4873
4874         /* top left */
4875         glTexCoord2f(left, top);
4876         glVertex2i(0, dst_rect.bottom - dst_rect.top);
4877
4878         /* top right */
4879         glTexCoord2f(right, top);
4880         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4881
4882         /* bottom right */
4883         glTexCoord2f(right, bottom);
4884         glVertex2i(dst_rect.right - dst_rect.left, 0);
4885     glEnd();
4886     checkGLcall("glEnd and previous");
4887
4888     if (texture_target != dst_surface->texture_target)
4889     {
4890         glDisable(texture_target);
4891         glEnable(dst_surface->texture_target);
4892         texture_target = dst_surface->texture_target;
4893     }
4894
4895     /* Now read the stretched and upside down image into the destination texture */
4896     glBindTexture(texture_target, dst_surface->texture_name);
4897     checkGLcall("glBindTexture");
4898     glCopyTexSubImage2D(texture_target,
4899                         0,
4900                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
4901                         0, 0, /* We blitted the image to the origin */
4902                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4903     checkGLcall("glCopyTexSubImage2D");
4904
4905     if(drawBuffer == GL_BACK) {
4906         /* Write the back buffer backup back */
4907         if(backup) {
4908             if(texture_target != GL_TEXTURE_2D) {
4909                 glDisable(texture_target);
4910                 glEnable(GL_TEXTURE_2D);
4911                 texture_target = GL_TEXTURE_2D;
4912             }
4913             glBindTexture(GL_TEXTURE_2D, backup);
4914             checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4915         }
4916         else
4917         {
4918             if (texture_target != src_surface->texture_target)
4919             {
4920                 glDisable(texture_target);
4921                 glEnable(src_surface->texture_target);
4922                 texture_target = src_surface->texture_target;
4923             }
4924             glBindTexture(src_surface->texture_target, src_surface->texture_name);
4925             checkGLcall("glBindTexture(src_surface->texture_target, src_surface->texture_name)");
4926         }
4927
4928         glBegin(GL_QUADS);
4929             /* top left */
4930             glTexCoord2f(0.0f, 0.0f);
4931             glVertex2i(0, fbheight);
4932
4933             /* bottom left */
4934             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
4935             glVertex2i(0, 0);
4936
4937             /* bottom right */
4938             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
4939                     (float)fbheight / (float)src_surface->pow2Height);
4940             glVertex2i(fbwidth, 0);
4941
4942             /* top right */
4943             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
4944             glVertex2i(fbwidth, fbheight);
4945         glEnd();
4946     }
4947     glDisable(texture_target);
4948     checkGLcall("glDisable(texture_target)");
4949
4950     /* Cleanup */
4951     if (src != src_surface->texture_name && src != backup)
4952     {
4953         glDeleteTextures(1, &src);
4954         checkGLcall("glDeleteTextures(1, &src)");
4955     }
4956     if(backup) {
4957         glDeleteTextures(1, &backup);
4958         checkGLcall("glDeleteTextures(1, &backup)");
4959     }
4960
4961     LEAVE_GL();
4962
4963     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
4964
4965     context_release(context);
4966
4967     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4968      * path is never entered
4969      */
4970     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4971 }
4972
4973 /* Front buffer coordinates are always full screen coordinates, but our GL
4974  * drawable is limited to the window's client area. The sysmem and texture
4975  * copies do have the full screen size. Note that GL has a bottom-left
4976  * origin, while D3D has a top-left origin. */
4977 void surface_translate_drawable_coords(struct wined3d_surface *surface, HWND window, RECT *rect)
4978 {
4979     UINT drawable_height;
4980
4981     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4982             && surface == surface->container.u.swapchain->front_buffer)
4983     {
4984         POINT offset = {0, 0};
4985         RECT windowsize;
4986
4987         ScreenToClient(window, &offset);
4988         OffsetRect(rect, offset.x, offset.y);
4989
4990         GetClientRect(window, &windowsize);
4991         drawable_height = windowsize.bottom - windowsize.top;
4992     }
4993     else
4994     {
4995         drawable_height = surface->resource.height;
4996     }
4997
4998     rect->top = drawable_height - rect->top;
4999     rect->bottom = drawable_height - rect->bottom;
5000 }
5001
5002 /* blit between surface locations. onscreen on different swapchains is not supported.
5003  * depth / stencil is not supported. */
5004 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
5005         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
5006         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
5007 {
5008     const struct wined3d_gl_info *gl_info;
5009     struct wined3d_context *context;
5010     RECT src_rect, dst_rect;
5011     GLenum gl_filter;
5012
5013     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
5014     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
5015             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
5016     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
5017             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
5018
5019     src_rect = *src_rect_in;
5020     dst_rect = *dst_rect_in;
5021
5022     switch (filter)
5023     {
5024         case WINED3DTEXF_LINEAR:
5025             gl_filter = GL_LINEAR;
5026             break;
5027
5028         default:
5029             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
5030         case WINED3DTEXF_NONE:
5031         case WINED3DTEXF_POINT:
5032             gl_filter = GL_NEAREST;
5033             break;
5034     }
5035
5036     if (src_location == SFLAG_INDRAWABLE && surface_is_offscreen(src_surface))
5037         src_location = SFLAG_INTEXTURE;
5038     if (dst_location == SFLAG_INDRAWABLE && surface_is_offscreen(dst_surface))
5039         dst_location = SFLAG_INTEXTURE;
5040
5041     /* Make sure the locations are up-to-date. Loading the destination
5042      * surface isn't required if the entire surface is overwritten. (And is
5043      * in fact harmful if we're being called by surface_load_location() with
5044      * the purpose of loading the destination surface.) */
5045     surface_load_location(src_surface, src_location, NULL);
5046     if (!surface_is_full_rect(dst_surface, &dst_rect))
5047         surface_load_location(dst_surface, dst_location, NULL);
5048
5049     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
5050     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
5051     else context = context_acquire(device, NULL);
5052
5053     if (!context->valid)
5054     {
5055         context_release(context);
5056         WARN("Invalid context, skipping blit.\n");
5057         return;
5058     }
5059
5060     gl_info = context->gl_info;
5061
5062     if (src_location == SFLAG_INDRAWABLE)
5063     {
5064         GLenum buffer = surface_get_gl_buffer(src_surface);
5065
5066         TRACE("Source surface %p is onscreen.\n", src_surface);
5067
5068         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
5069
5070         ENTER_GL();
5071         context_bind_fbo(context, GL_READ_FRAMEBUFFER, NULL);
5072         glReadBuffer(buffer);
5073         checkGLcall("glReadBuffer()");
5074     }
5075     else
5076     {
5077         TRACE("Source surface %p is offscreen.\n", src_surface);
5078         ENTER_GL();
5079         context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
5080         glReadBuffer(GL_COLOR_ATTACHMENT0);
5081         checkGLcall("glReadBuffer()");
5082     }
5083     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
5084     LEAVE_GL();
5085
5086     if (dst_location == SFLAG_INDRAWABLE)
5087     {
5088         GLenum buffer = surface_get_gl_buffer(dst_surface);
5089
5090         TRACE("Destination surface %p is onscreen.\n", dst_surface);
5091
5092         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5093
5094         ENTER_GL();
5095         context_bind_fbo(context, GL_DRAW_FRAMEBUFFER, NULL);
5096         context_set_draw_buffer(context, buffer);
5097     }
5098     else
5099     {
5100         TRACE("Destination surface %p is offscreen.\n", dst_surface);
5101
5102         ENTER_GL();
5103         context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
5104         context_set_draw_buffer(context, GL_COLOR_ATTACHMENT0);
5105     }
5106     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
5107
5108     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
5109     device_invalidate_state(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
5110     device_invalidate_state(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
5111     device_invalidate_state(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
5112     device_invalidate_state(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
5113
5114     glDisable(GL_SCISSOR_TEST);
5115     device_invalidate_state(device, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
5116
5117     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
5118             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
5119     checkGLcall("glBlitFramebuffer()");
5120
5121     LEAVE_GL();
5122
5123     if (wined3d_settings.strict_draw_ordering
5124             || (dst_location == SFLAG_INDRAWABLE
5125             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
5126         wglFlush();
5127
5128     context_release(context);
5129 }
5130
5131 static void surface_blt_to_drawable(struct wined3d_device *device,
5132         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5133         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5134         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5135 {
5136     struct wined3d_context *context;
5137     RECT src_rect, dst_rect;
5138
5139     src_rect = *src_rect_in;
5140     dst_rect = *dst_rect_in;
5141
5142     /* Make sure the surface is up-to-date. This should probably use
5143      * surface_load_location() and worry about the destination surface too,
5144      * unless we're overwriting it completely. */
5145     surface_internal_preload(src_surface, SRGB_RGB);
5146
5147     /* Activate the destination context, set it up for blitting */
5148     context = context_acquire(device, dst_surface);
5149     context_apply_blit_state(context, device);
5150
5151     if (!surface_is_offscreen(dst_surface))
5152         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5153
5154     device->blitter->set_shader(device->blit_priv, context->gl_info, src_surface);
5155
5156     ENTER_GL();
5157
5158     if (color_key)
5159     {
5160         glEnable(GL_ALPHA_TEST);
5161         checkGLcall("glEnable(GL_ALPHA_TEST)");
5162
5163         /* When the primary render target uses P8, the alpha component
5164          * contains the palette index. Which means that the colorkey is one of
5165          * the palette entries. In other cases pixels that should be masked
5166          * away have alpha set to 0. */
5167         if (primary_render_target_is_p8(device))
5168             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5169         else
5170             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5171         checkGLcall("glAlphaFunc");
5172     }
5173     else
5174     {
5175         glDisable(GL_ALPHA_TEST);
5176         checkGLcall("glDisable(GL_ALPHA_TEST)");
5177     }
5178
5179     draw_textured_quad(src_surface, &src_rect, &dst_rect, filter);
5180
5181     if (color_key)
5182     {
5183         glDisable(GL_ALPHA_TEST);
5184         checkGLcall("glDisable(GL_ALPHA_TEST)");
5185     }
5186
5187     LEAVE_GL();
5188
5189     /* Leave the opengl state valid for blitting */
5190     device->blitter->unset_shader(context->gl_info);
5191
5192     if (wined3d_settings.strict_draw_ordering
5193             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5194             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5195         wglFlush(); /* Flush to ensure ordering across contexts. */
5196
5197     context_release(context);
5198 }
5199
5200 /* Do not call while under the GL lock. */
5201 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5202 {
5203     struct wined3d_device *device = s->resource.device;
5204     const struct blit_shader *blitter;
5205
5206     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5207             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5208     if (!blitter)
5209     {
5210         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5211         return WINED3DERR_INVALIDCALL;
5212     }
5213
5214     return blitter->color_fill(device, s, rect, color);
5215 }
5216
5217 /* Do not call while under the GL lock. */
5218 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *DestRect,
5219         struct wined3d_surface *src_surface, const RECT *SrcRect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5220         WINED3DTEXTUREFILTERTYPE Filter)
5221 {
5222     struct wined3d_device *device = dst_surface->resource.device;
5223     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5224     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5225     RECT dst_rect, src_rect;
5226
5227     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5228             dst_surface, wine_dbgstr_rect(DestRect), src_surface, wine_dbgstr_rect(SrcRect),
5229             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5230
5231     /* Get the swapchain. One of the surfaces has to be a primary surface */
5232     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5233     {
5234         WARN("Destination is in sysmem, rejecting gl blt\n");
5235         return WINED3DERR_INVALIDCALL;
5236     }
5237
5238     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5239         dstSwapchain = dst_surface->container.u.swapchain;
5240
5241     if (src_surface)
5242     {
5243         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5244         {
5245             WARN("Src is in sysmem, rejecting gl blt\n");
5246             return WINED3DERR_INVALIDCALL;
5247         }
5248
5249         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5250             srcSwapchain = src_surface->container.u.swapchain;
5251     }
5252
5253     /* Early sort out of cases where no render target is used */
5254     if (!dstSwapchain && !srcSwapchain
5255             && src_surface != device->fb.render_targets[0]
5256             && dst_surface != device->fb.render_targets[0])
5257     {
5258         TRACE("No surface is render target, not using hardware blit.\n");
5259         return WINED3DERR_INVALIDCALL;
5260     }
5261
5262     /* No destination color keying supported */
5263     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5264     {
5265         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5266         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5267         return WINED3DERR_INVALIDCALL;
5268     }
5269
5270     surface_get_rect(dst_surface, DestRect, &dst_rect);
5271     if (src_surface) surface_get_rect(src_surface, SrcRect, &src_rect);
5272
5273     /* The only case where both surfaces on a swapchain are supported is a back buffer -> front buffer blit on the same swapchain */
5274     if (dstSwapchain && dstSwapchain == srcSwapchain && dstSwapchain->back_buffers
5275             && dst_surface == dstSwapchain->front_buffer
5276             && src_surface == dstSwapchain->back_buffers[0])
5277     {
5278         /* Half-Life does a Blt from the back buffer to the front buffer,
5279          * Full surface size, no flags... Use present instead
5280          *
5281          * This path will only be entered for d3d7 and ddraw apps, because d3d8/9 offer no way to blit TO the front buffer
5282          */
5283
5284         /* Check rects - wined3d_swapchain_present() doesn't handle them. */
5285         for (;;)
5286         {
5287             TRACE("Looking if a Present can be done...\n");
5288             /* Source Rectangle must be full surface */
5289             if (src_rect.left || src_rect.top
5290                     || src_rect.right != src_surface->resource.width
5291                     || src_rect.bottom != src_surface->resource.height)
5292             {
5293                 TRACE("No, Source rectangle doesn't match\n");
5294                 break;
5295             }
5296
5297             /* No stretching may occur */
5298             if(src_rect.right != dst_rect.right - dst_rect.left ||
5299                src_rect.bottom != dst_rect.bottom - dst_rect.top) {
5300                 TRACE("No, stretching is done\n");
5301                 break;
5302             }
5303
5304             /* Destination must be full surface or match the clipping rectangle */
5305             if (dst_surface->clipper && dst_surface->clipper->hWnd)
5306             {
5307                 RECT cliprect;
5308                 POINT pos[2];
5309                 GetClientRect(dst_surface->clipper->hWnd, &cliprect);
5310                 pos[0].x = dst_rect.left;
5311                 pos[0].y = dst_rect.top;
5312                 pos[1].x = dst_rect.right;
5313                 pos[1].y = dst_rect.bottom;
5314                 MapWindowPoints(GetDesktopWindow(), dst_surface->clipper->hWnd, pos, 2);
5315
5316                 if(pos[0].x != cliprect.left  || pos[0].y != cliprect.top   ||
5317                    pos[1].x != cliprect.right || pos[1].y != cliprect.bottom)
5318                 {
5319                     TRACE("No, dest rectangle doesn't match(clipper)\n");
5320                     TRACE("Clip rect at %s\n", wine_dbgstr_rect(&cliprect));
5321                     TRACE("Blt dest: %s\n", wine_dbgstr_rect(&dst_rect));
5322                     break;
5323                 }
5324             }
5325             else if (dst_rect.left || dst_rect.top
5326                     || dst_rect.right != dst_surface->resource.width
5327                     || dst_rect.bottom != dst_surface->resource.height)
5328             {
5329                 TRACE("No, dest rectangle doesn't match(surface size)\n");
5330                 break;
5331             }
5332
5333             TRACE("Yes\n");
5334
5335             /* These flags are unimportant for the flag check, remove them */
5336             if (!(flags & ~(WINEDDBLT_DONOTWAIT | WINEDDBLT_WAIT)))
5337             {
5338                 WINED3DSWAPEFFECT orig_swap = dstSwapchain->presentParms.SwapEffect;
5339
5340                 /* The idea behind this is that a glReadPixels and a glDrawPixels call
5341                     * take very long, while a flip is fast.
5342                     * This applies to Half-Life, which does such Blts every time it finished
5343                     * a frame, and to Prince of Persia 3D, which uses this to draw at least the main
5344                     * menu. This is also used by all apps when they do windowed rendering
5345                     *
5346                     * The problem is that flipping is not really the same as copying. After a
5347                     * Blt the front buffer is a copy of the back buffer, and the back buffer is
5348                     * untouched. Therefore it's necessary to override the swap effect
5349                     * and to set it back after the flip.
5350                     *
5351                     * Windowed Direct3D < 7 apps do the same. The D3D7 sdk demos are nice
5352                     * testcases.
5353                     */
5354
5355                 dstSwapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
5356                 dstSwapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
5357
5358                 TRACE("Full screen back buffer -> front buffer blt, performing a flip instead.\n");
5359                 wined3d_swapchain_present(dstSwapchain, NULL, NULL, dstSwapchain->win_handle, NULL, 0);
5360
5361                 dstSwapchain->presentParms.SwapEffect = orig_swap;
5362
5363                 return WINED3D_OK;
5364             }
5365             break;
5366         }
5367
5368         TRACE("Unsupported blit between buffers on the same swapchain\n");
5369         return WINED3DERR_INVALIDCALL;
5370     } else if(dstSwapchain && dstSwapchain == srcSwapchain) {
5371         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5372         return WINED3DERR_INVALIDCALL;
5373     } else if(dstSwapchain && srcSwapchain) {
5374         FIXME("Implement hardware blit between two different swapchains\n");
5375         return WINED3DERR_INVALIDCALL;
5376     }
5377     else if (dstSwapchain)
5378     {
5379         /* Handled with regular texture -> swapchain blit */
5380         if (src_surface == device->fb.render_targets[0])
5381             TRACE("Blit from active render target to a swapchain\n");
5382     }
5383     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5384     {
5385         FIXME("Implement blit from a swapchain to the active render target\n");
5386         return WINED3DERR_INVALIDCALL;
5387     }
5388
5389     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5390     {
5391         /* Blit from render target to texture */
5392         BOOL stretchx;
5393
5394         /* P8 read back is not implemented */
5395         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5396                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5397         {
5398             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5399             return WINED3DERR_INVALIDCALL;
5400         }
5401
5402         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5403         {
5404             TRACE("Color keying not supported by frame buffer to texture blit\n");
5405             return WINED3DERR_INVALIDCALL;
5406             /* Destination color key is checked above */
5407         }
5408
5409         if(dst_rect.right - dst_rect.left != src_rect.right - src_rect.left) {
5410             stretchx = TRUE;
5411         } else {
5412             stretchx = FALSE;
5413         }
5414
5415         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5416          * flip the image nor scale it.
5417          *
5418          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5419          * -> If the app wants a image width an unscaled width, copy it line per line
5420          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5421          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5422          *    back buffer. This is slower than reading line per line, thus not used for flipping
5423          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5424          *    pixel by pixel
5425          *
5426          * If EXT_framebuffer_blit is supported that can be used instead. Note that EXT_framebuffer_blit implies
5427          * FBO support, so it doesn't really make sense to try and make it work with different offscreen rendering
5428          * backends. */
5429         if (fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5430                 &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5431                 &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5432         {
5433             surface_blt_fbo(device, Filter,
5434                     src_surface, SFLAG_INDRAWABLE, &src_rect,
5435                     dst_surface, SFLAG_INDRAWABLE, &dst_rect);
5436             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5437         }
5438         else if (!stretchx || dst_rect.right - dst_rect.left > src_surface->resource.width
5439                 || dst_rect.bottom - dst_rect.top > src_surface->resource.height)
5440         {
5441             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5442             fb_copy_to_texture_direct(dst_surface, src_surface, &src_rect, &dst_rect, Filter);
5443         } else {
5444             TRACE("Using hardware stretching to flip / stretch the texture\n");
5445             fb_copy_to_texture_hwstretch(dst_surface, src_surface, &src_rect, &dst_rect, Filter);
5446         }
5447
5448         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5449         {
5450             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5451             dst_surface->resource.allocatedMemory = NULL;
5452             dst_surface->resource.heapMemory = NULL;
5453         }
5454         else
5455         {
5456             dst_surface->flags &= ~SFLAG_INSYSMEM;
5457         }
5458
5459         return WINED3D_OK;
5460     }
5461     else if (src_surface)
5462     {
5463         /* Blit from offscreen surface to render target */
5464         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5465         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5466
5467         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5468
5469         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5470                 && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5471                         &src_rect, src_surface->resource.usage, src_surface->resource.pool,
5472                         src_surface->resource.format,
5473                         &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5474                         dst_surface->resource.format))
5475         {
5476             TRACE("Using surface_blt_fbo.\n");
5477             /* The source is always a texture, but never the currently active render target, and the texture
5478              * contents are never upside down. */
5479             surface_blt_fbo(device, Filter,
5480                     src_surface, SFLAG_INDRAWABLE, &src_rect,
5481                     dst_surface, SFLAG_INDRAWABLE, &dst_rect);
5482             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5483             return WINED3D_OK;
5484         }
5485
5486         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5487                 && arbfp_blit.blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5488                         &src_rect, src_surface->resource.usage, src_surface->resource.pool,
5489                         src_surface->resource.format,
5490                         &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5491                         dst_surface->resource.format))
5492         {
5493             return arbfp_blit_surface(device, src_surface, &src_rect, dst_surface, &dst_rect,
5494                     WINED3D_BLIT_OP_COLOR_BLIT, Filter);
5495         }
5496
5497         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5498                 &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5499                 &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5500         {
5501             FIXME("Unsupported blit operation falling back to software\n");
5502             return WINED3DERR_INVALIDCALL;
5503         }
5504
5505         /* Color keying: Check if we have to do a color keyed blt,
5506          * and if not check if a color key is activated.
5507          *
5508          * Just modify the color keying parameters in the surface and restore them afterwards
5509          * The surface keeps track of the color key last used to load the opengl surface.
5510          * PreLoad will catch the change to the flags and color key and reload if necessary.
5511          */
5512         if (flags & WINEDDBLT_KEYSRC)
5513         {
5514             /* Use color key from surface */
5515         }
5516         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5517         {
5518             /* Use color key from DDBltFx */
5519             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5520             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5521         }
5522         else
5523         {
5524             /* Do not use color key */
5525             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5526         }
5527
5528         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5529                 src_surface, &src_rect, dst_surface, &dst_rect);
5530
5531         /* Restore the color key parameters */
5532         src_surface->CKeyFlags = oldCKeyFlags;
5533         src_surface->SrcBltCKey = oldBltCKey;
5534
5535         surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5536
5537         return WINED3D_OK;
5538     }
5539     else
5540     {
5541         /* Source-Less Blit to render target */
5542         if (flags & WINEDDBLT_COLORFILL)
5543         {
5544             WINED3DCOLORVALUE color;
5545
5546             TRACE("Colorfill\n");
5547
5548             /* The color as given in the Blt function is in the surface format. */
5549             if (!surface_convert_color_to_float(dst_surface, DDBltFx->u5.dwFillColor, &color))
5550                 return WINED3DERR_INVALIDCALL;
5551
5552             return surface_color_fill(dst_surface, &dst_rect, &color);
5553         }
5554     }
5555
5556     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5557     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5558     return WINED3DERR_INVALIDCALL;
5559 }
5560
5561 /* GL locking is done by the caller */
5562 static void surface_depth_blt(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
5563         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5564 {
5565     struct wined3d_device *device = surface->resource.device;
5566     GLint compare_mode = GL_NONE;
5567     struct blt_info info;
5568     GLint old_binding = 0;
5569     RECT rect;
5570
5571     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5572
5573     glDisable(GL_CULL_FACE);
5574     glDisable(GL_BLEND);
5575     glDisable(GL_ALPHA_TEST);
5576     glDisable(GL_SCISSOR_TEST);
5577     glDisable(GL_STENCIL_TEST);
5578     glEnable(GL_DEPTH_TEST);
5579     glDepthFunc(GL_ALWAYS);
5580     glDepthMask(GL_TRUE);
5581     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5582     glViewport(x, y, w, h);
5583
5584     SetRect(&rect, 0, h, w, 0);
5585     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5586     GL_EXTCALL(glActiveTextureARB(GL_TEXTURE0_ARB));
5587     glGetIntegerv(info.binding, &old_binding);
5588     glBindTexture(info.bind_target, texture);
5589     if (gl_info->supported[ARB_SHADOW])
5590     {
5591         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5592         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5593     }
5594
5595     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5596             gl_info, info.tex_type, &surface->ds_current_size);
5597
5598     glBegin(GL_TRIANGLE_STRIP);
5599     glTexCoord3fv(info.coords[0]);
5600     glVertex2f(-1.0f, -1.0f);
5601     glTexCoord3fv(info.coords[1]);
5602     glVertex2f(1.0f, -1.0f);
5603     glTexCoord3fv(info.coords[2]);
5604     glVertex2f(-1.0f, 1.0f);
5605     glTexCoord3fv(info.coords[3]);
5606     glVertex2f(1.0f, 1.0f);
5607     glEnd();
5608
5609     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5610     glBindTexture(info.bind_target, old_binding);
5611
5612     glPopAttrib();
5613
5614     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5615 }
5616
5617 void surface_modify_ds_location(struct wined3d_surface *surface,
5618         DWORD location, UINT w, UINT h)
5619 {
5620     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5621
5622     if (location & ~SFLAG_DS_LOCATIONS)
5623         FIXME("Invalid location (%#x) specified.\n", location);
5624
5625     surface->ds_current_size.cx = w;
5626     surface->ds_current_size.cy = h;
5627     surface->flags &= ~SFLAG_DS_LOCATIONS;
5628     surface->flags |= location;
5629 }
5630
5631 /* Context activation is done by the caller. */
5632 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5633 {
5634     struct wined3d_device *device = surface->resource.device;
5635     const struct wined3d_gl_info *gl_info = context->gl_info;
5636     GLsizei w, h;
5637
5638     TRACE("surface %p, new location %#x.\n", surface, location);
5639
5640     /* TODO: Make this work for modes other than FBO */
5641     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5642
5643     if (!(surface->flags & location))
5644     {
5645         w = surface->ds_current_size.cx;
5646         h = surface->ds_current_size.cy;
5647         surface->ds_current_size.cx = 0;
5648         surface->ds_current_size.cy = 0;
5649     }
5650     else
5651     {
5652         w = surface->resource.width;
5653         h = surface->resource.height;
5654     }
5655
5656     if (surface->ds_current_size.cx == surface->resource.width
5657             && surface->ds_current_size.cy == surface->resource.height)
5658     {
5659         TRACE("Location (%#x) is already up to date.\n", location);
5660         return;
5661     }
5662
5663     if (surface->current_renderbuffer)
5664     {
5665         FIXME("Not supported with fixed up depth stencil.\n");
5666         return;
5667     }
5668
5669     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5670     {
5671         /* This mostly happens when a depth / stencil is used without being
5672          * cleared first. In principle we could upload from sysmem, or
5673          * explicitly clear before first usage. For the moment there don't
5674          * appear to be a lot of applications depending on this, so a FIXME
5675          * should do. */
5676         FIXME("No up to date depth stencil location.\n");
5677         surface->flags |= location;
5678         surface->ds_current_size.cx = surface->resource.width;
5679         surface->ds_current_size.cy = surface->resource.height;
5680         return;
5681     }
5682
5683     if (location == SFLAG_DS_OFFSCREEN)
5684     {
5685         GLint old_binding = 0;
5686         GLenum bind_target;
5687
5688         /* The render target is allowed to be smaller than the depth/stencil
5689          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5690          * than the offscreen surface. Don't overwrite the offscreen surface
5691          * with undefined data. */
5692         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5693         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5694
5695         TRACE("Copying onscreen depth buffer to depth texture.\n");
5696
5697         ENTER_GL();
5698
5699         if (!device->depth_blt_texture)
5700         {
5701             glGenTextures(1, &device->depth_blt_texture);
5702         }
5703
5704         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5705          * directly on the FBO texture. That's because we need to flip. */
5706         context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
5707         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5708         {
5709             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5710             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5711         }
5712         else
5713         {
5714             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5715             bind_target = GL_TEXTURE_2D;
5716         }
5717         glBindTexture(bind_target, device->depth_blt_texture);
5718         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5719          * internal format, because the internal format might include stencil
5720          * data. In principle we should copy stencil data as well, but unless
5721          * the driver supports stencil export it's hard to do, and doesn't
5722          * seem to be needed in practice. If the hardware doesn't support
5723          * writing stencil data, the glCopyTexImage2D() call might trigger
5724          * software fallbacks. */
5725         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5726         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5727         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5728         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5729         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5730         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5731         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5732         glBindTexture(bind_target, old_binding);
5733
5734         /* Setup the destination */
5735         if (!device->depth_blt_rb)
5736         {
5737             gl_info->fbo_ops.glGenRenderbuffers(1, &device->depth_blt_rb);
5738             checkGLcall("glGenRenderbuffersEXT");
5739         }
5740         if (device->depth_blt_rb_w != w || device->depth_blt_rb_h != h)
5741         {
5742             gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, device->depth_blt_rb);
5743             checkGLcall("glBindRenderbufferEXT");
5744             gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, w, h);
5745             checkGLcall("glRenderbufferStorageEXT");
5746             device->depth_blt_rb_w = w;
5747             device->depth_blt_rb_h = h;
5748         }
5749
5750         context_bind_fbo(context, GL_FRAMEBUFFER, &context->dst_fbo);
5751         gl_info->fbo_ops.glFramebufferRenderbuffer(GL_FRAMEBUFFER,
5752                 GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, device->depth_blt_rb);
5753         checkGLcall("glFramebufferRenderbufferEXT");
5754         context_attach_depth_stencil_fbo(context, GL_FRAMEBUFFER, surface, FALSE);
5755
5756         /* Do the actual blit */
5757         surface_depth_blt(surface, gl_info, device->depth_blt_texture, 0, 0, w, h, bind_target);
5758         checkGLcall("depth_blt");
5759
5760         if (context->current_fbo) context_bind_fbo(context, GL_FRAMEBUFFER, &context->current_fbo->id);
5761         else context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
5762
5763         LEAVE_GL();
5764
5765         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5766     }
5767     else if (location == SFLAG_DS_ONSCREEN)
5768     {
5769         TRACE("Copying depth texture to onscreen depth buffer.\n");
5770
5771         ENTER_GL();
5772
5773         context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
5774         surface_depth_blt(surface, gl_info, surface->texture_name,
5775                 0, surface->pow2Height - h, w, h, surface->texture_target);
5776         checkGLcall("depth_blt");
5777
5778         if (context->current_fbo) context_bind_fbo(context, GL_FRAMEBUFFER, &context->current_fbo->id);
5779
5780         LEAVE_GL();
5781
5782         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5783     }
5784     else
5785     {
5786         ERR("Invalid location (%#x) specified.\n", location);
5787     }
5788
5789     surface->flags |= location;
5790     surface->ds_current_size.cx = surface->resource.width;
5791     surface->ds_current_size.cy = surface->resource.height;
5792 }
5793
5794 void surface_modify_location(struct wined3d_surface *surface, DWORD flag, BOOL persistent)
5795 {
5796     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5797     struct wined3d_surface *overlay;
5798
5799     TRACE("surface %p, location %s, persistent %#x.\n",
5800             surface, debug_surflocation(flag), persistent);
5801
5802     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
5803     {
5804         if (surface_is_offscreen(surface))
5805         {
5806             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
5807             if (flag & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)) flag |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
5808         }
5809         else
5810         {
5811             TRACE("Surface %p is an onscreen surface.\n", surface);
5812         }
5813     }
5814
5815     if (flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5816             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5817     {
5818         flag |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5819     }
5820
5821     if (persistent)
5822     {
5823         if (((surface->flags & SFLAG_INTEXTURE) && !(flag & SFLAG_INTEXTURE))
5824                 || ((surface->flags & SFLAG_INSRGBTEX) && !(flag & SFLAG_INSRGBTEX)))
5825         {
5826             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5827             {
5828                 TRACE("Passing to container.\n");
5829                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5830             }
5831         }
5832         surface->flags &= ~SFLAG_LOCATIONS;
5833         surface->flags |= flag;
5834
5835         /* Redraw emulated overlays, if any */
5836         if (flag & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5837         {
5838             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5839             {
5840                 overlay->surface_ops->surface_draw_overlay(overlay);
5841             }
5842         }
5843     }
5844     else
5845     {
5846         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5847         {
5848             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5849             {
5850                 TRACE("Passing to container\n");
5851                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5852             }
5853         }
5854         surface->flags &= ~flag;
5855     }
5856
5857     if (!(surface->flags & SFLAG_LOCATIONS))
5858     {
5859         ERR("Surface %p does not have any up to date location.\n", surface);
5860     }
5861 }
5862
5863 static DWORD resource_access_from_location(DWORD location)
5864 {
5865     switch (location)
5866     {
5867         case SFLAG_INSYSMEM:
5868             return WINED3D_RESOURCE_ACCESS_CPU;
5869
5870         case SFLAG_INDRAWABLE:
5871         case SFLAG_INSRGBTEX:
5872         case SFLAG_INTEXTURE:
5873             return WINED3D_RESOURCE_ACCESS_GPU;
5874
5875         default:
5876             FIXME("Unhandled location %#x.\n", location);
5877             return 0;
5878     }
5879 }
5880
5881 static void surface_load_sysmem(struct wined3d_surface *surface,
5882         const struct wined3d_gl_info *gl_info, const RECT *rect)
5883 {
5884     surface_prepare_system_memory(surface);
5885
5886     /* Download the surface to system memory. */
5887     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5888     {
5889         struct wined3d_device *device = surface->resource.device;
5890         struct wined3d_context *context = NULL;
5891
5892         if (!device->isInDraw)
5893             context = context_acquire(device, NULL);
5894
5895         surface_bind_and_dirtify(surface, gl_info, !(surface->flags & SFLAG_INTEXTURE));
5896         surface_download_data(surface, gl_info);
5897
5898         if (context)
5899             context_release(context);
5900
5901         return;
5902     }
5903
5904     /* Note: It might be faster to download into a texture first. */
5905     read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5906             wined3d_surface_get_pitch(surface));
5907 }
5908
5909 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5910         const struct wined3d_gl_info *gl_info, const RECT *rect)
5911 {
5912     struct wined3d_device *device = surface->resource.device;
5913     struct wined3d_format format;
5914     CONVERT_TYPES convert;
5915     UINT byte_count;
5916     BYTE *mem;
5917
5918     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5919         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5920
5921     if (surface->flags & SFLAG_INTEXTURE)
5922     {
5923         RECT r;
5924
5925         surface_get_rect(surface, rect, &r);
5926         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5927
5928         return WINED3D_OK;
5929     }
5930
5931     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5932     {
5933         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5934          * path through sysmem. */
5935         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5936     }
5937
5938     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5939
5940     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5941      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5942      * called. */
5943     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5944     {
5945         struct wined3d_context *context = NULL;
5946
5947         TRACE("Removing the pbo attached to surface %p.\n", surface);
5948
5949         if (!device->isInDraw)
5950             context = context_acquire(device, NULL);
5951
5952         surface_remove_pbo(surface, gl_info);
5953
5954         if (context)
5955             context_release(context);
5956     }
5957
5958     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5959     {
5960         UINT height = surface->resource.height;
5961         UINT width = surface->resource.width;
5962         UINT src_pitch, dst_pitch;
5963
5964         byte_count = format.conv_byte_count;
5965         src_pitch = wined3d_surface_get_pitch(surface);
5966
5967         /* Stick to the alignment for the converted surface too, makes it
5968          * easier to load the surface. */
5969         dst_pitch = width * byte_count;
5970         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5971
5972         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5973         {
5974             ERR("Out of memory (%u).\n", dst_pitch * height);
5975             return E_OUTOFMEMORY;
5976         }
5977
5978         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5979                 src_pitch, width, height, dst_pitch, convert, surface);
5980
5981         surface->flags |= SFLAG_CONVERTED;
5982     }
5983     else
5984     {
5985         surface->flags &= ~SFLAG_CONVERTED;
5986         mem = surface->resource.allocatedMemory;
5987         byte_count = format.byte_count;
5988     }
5989
5990     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5991
5992     /* Don't delete PBO memory. */
5993     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5994         HeapFree(GetProcessHeap(), 0, mem);
5995
5996     return WINED3D_OK;
5997 }
5998
5999 static HRESULT surface_load_texture(struct wined3d_surface *surface,
6000         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
6001 {
6002     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
6003     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
6004     struct wined3d_device *device = surface->resource.device;
6005     struct wined3d_context *context = NULL;
6006     UINT width, src_pitch, dst_pitch;
6007     struct wined3d_bo_address data;
6008     struct wined3d_format format;
6009     POINT dst_point = {0, 0};
6010     CONVERT_TYPES convert;
6011     BYTE *mem;
6012
6013     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6014             && surface_is_offscreen(surface)
6015             && (surface->flags & SFLAG_INDRAWABLE))
6016     {
6017         read_from_framebuffer_texture(surface, srgb);
6018
6019         return WINED3D_OK;
6020     }
6021
6022     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6023             && (surface->resource.format->flags & attach_flags) == attach_flags
6024             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6025                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6026                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6027     {
6028         if (srgb)
6029             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
6030                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6031         else
6032             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
6033                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6034
6035         return WINED3D_OK;
6036     }
6037
6038     /* Upload from system memory */
6039
6040     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6041             TRUE /* We will use textures */, &format, &convert);
6042
6043     if (srgb)
6044     {
6045         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6046         {
6047             /* Performance warning... */
6048             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6049             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6050         }
6051     }
6052     else
6053     {
6054         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6055         {
6056             /* Performance warning... */
6057             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6058             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6059         }
6060     }
6061
6062     if (!(surface->flags & SFLAG_INSYSMEM))
6063     {
6064         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6065         /* Lets hope we get it from somewhere... */
6066         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6067     }
6068
6069     if (!device->isInDraw)
6070         context = context_acquire(device, NULL);
6071
6072     surface_prepare_texture(surface, gl_info, srgb);
6073     surface_bind_and_dirtify(surface, gl_info, srgb);
6074
6075     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6076     {
6077         surface->flags |= SFLAG_GLCKEY;
6078         surface->glCKey = surface->SrcBltCKey;
6079     }
6080     else surface->flags &= ~SFLAG_GLCKEY;
6081
6082     width = surface->resource.width;
6083     src_pitch = wined3d_surface_get_pitch(surface);
6084
6085     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6086      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6087      * called. */
6088     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6089     {
6090         TRACE("Removing the pbo attached to surface %p.\n", surface);
6091         surface_remove_pbo(surface, gl_info);
6092     }
6093
6094     if (format.convert)
6095     {
6096         /* This code is entered for texture formats which need a fixup. */
6097         UINT height = surface->resource.height;
6098
6099         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6100         dst_pitch = width * format.conv_byte_count;
6101         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6102
6103         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6104         {
6105             ERR("Out of memory (%u).\n", dst_pitch * height);
6106             if (context)
6107                 context_release(context);
6108             return E_OUTOFMEMORY;
6109         }
6110         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6111     }
6112     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6113     {
6114         /* This code is only entered for color keying fixups */
6115         UINT height = surface->resource.height;
6116
6117         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6118         dst_pitch = width * format.conv_byte_count;
6119         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6120
6121         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6122         {
6123             ERR("Out of memory (%u).\n", dst_pitch * height);
6124             if (context)
6125                 context_release(context);
6126             return E_OUTOFMEMORY;
6127         }
6128         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6129                 width, height, dst_pitch, convert, surface);
6130     }
6131     else
6132     {
6133         mem = surface->resource.allocatedMemory;
6134     }
6135
6136     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6137     data.addr = mem;
6138     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6139
6140     if (context)
6141         context_release(context);
6142
6143     /* Don't delete PBO memory. */
6144     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6145         HeapFree(GetProcessHeap(), 0, mem);
6146
6147     return WINED3D_OK;
6148 }
6149
6150 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD flag, const RECT *rect)
6151 {
6152     struct wined3d_device *device = surface->resource.device;
6153     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6154     BOOL in_fbo = FALSE;
6155     HRESULT hr;
6156
6157     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(flag), wine_dbgstr_rect(rect));
6158
6159     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6160     {
6161         if (flag == SFLAG_INTEXTURE)
6162         {
6163             struct wined3d_context *context = context_acquire(device, NULL);
6164             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6165             context_release(context);
6166             return WINED3D_OK;
6167         }
6168         else
6169         {
6170             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(flag));
6171             return WINED3DERR_INVALIDCALL;
6172         }
6173     }
6174
6175     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6176     {
6177         if (surface_is_offscreen(surface))
6178         {
6179             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets.
6180              * Prefer SFLAG_INTEXTURE. */
6181             if (flag == SFLAG_INDRAWABLE) flag = SFLAG_INTEXTURE;
6182             in_fbo = TRUE;
6183         }
6184         else
6185         {
6186             TRACE("Surface %p is an onscreen surface.\n", surface);
6187         }
6188     }
6189
6190     if (flag == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6191     {
6192         flag = SFLAG_INTEXTURE;
6193     }
6194
6195     if (surface->flags & flag)
6196     {
6197         TRACE("Location already up to date\n");
6198         return WINED3D_OK;
6199     }
6200
6201     if (WARN_ON(d3d_surface))
6202     {
6203         DWORD required_access = resource_access_from_location(flag);
6204         if ((surface->resource.access_flags & required_access) != required_access)
6205             WARN("Operation requires %#x access, but surface only has %#x.\n",
6206                     required_access, surface->resource.access_flags);
6207     }
6208
6209     if (!(surface->flags & SFLAG_LOCATIONS))
6210     {
6211         ERR("Surface %p does not have any up to date location.\n", surface);
6212         surface->flags |= SFLAG_LOST;
6213         return WINED3DERR_DEVICELOST;
6214     }
6215
6216     switch (flag)
6217     {
6218         case SFLAG_INSYSMEM:
6219             surface_load_sysmem(surface, gl_info, rect);
6220             break;
6221
6222         case SFLAG_INDRAWABLE:
6223             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6224                 return hr;
6225             break;
6226
6227         case SFLAG_INTEXTURE:
6228         case SFLAG_INSRGBTEX:
6229             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, flag == SFLAG_INSRGBTEX)))
6230                 return hr;
6231             break;
6232
6233         default:
6234             ERR("Don't know how to handle location %#x.\n", flag);
6235             break;
6236     }
6237
6238     if (!rect)
6239     {
6240         surface->flags |= flag;
6241
6242         if (flag != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6243             surface_evict_sysmem(surface);
6244     }
6245
6246     if (in_fbo && (surface->flags & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)))
6247     {
6248         /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
6249         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
6250     }
6251
6252     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6253             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6254     {
6255         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6256     }
6257
6258     return WINED3D_OK;
6259 }
6260
6261 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6262 {
6263     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6264
6265     /* Not on a swapchain - must be offscreen */
6266     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6267
6268     /* The front buffer is always onscreen */
6269     if (surface == swapchain->front_buffer) return FALSE;
6270
6271     /* If the swapchain is rendered to an FBO, the backbuffer is
6272      * offscreen, otherwise onscreen */
6273     return swapchain->render_to_fbo;
6274 }
6275
6276 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6277 /* Context activation is done by the caller. */
6278 static void ffp_blit_free(struct wined3d_device *device) { }
6279
6280 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6281 /* Context activation is done by the caller. */
6282 static void ffp_blit_p8_upload_palette(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6283 {
6284     BYTE table[256][4];
6285     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6286
6287     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6288
6289     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6290     ENTER_GL();
6291     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6292     LEAVE_GL();
6293 }
6294
6295 /* Context activation is done by the caller. */
6296 static HRESULT ffp_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6297 {
6298     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6299
6300     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6301      * else the surface is converted in software at upload time in LoadLocation.
6302      */
6303     if(fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6304         ffp_blit_p8_upload_palette(surface, gl_info);
6305
6306     ENTER_GL();
6307     glEnable(surface->texture_target);
6308     checkGLcall("glEnable(surface->texture_target)");
6309     LEAVE_GL();
6310     return WINED3D_OK;
6311 }
6312
6313 /* Context activation is done by the caller. */
6314 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6315 {
6316     ENTER_GL();
6317     glDisable(GL_TEXTURE_2D);
6318     checkGLcall("glDisable(GL_TEXTURE_2D)");
6319     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6320     {
6321         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6322         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6323     }
6324     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6325     {
6326         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6327         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6328     }
6329     LEAVE_GL();
6330 }
6331
6332 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6333         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6334         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6335 {
6336     enum complex_fixup src_fixup;
6337
6338     switch (blit_op)
6339     {
6340         case WINED3D_BLIT_OP_COLOR_BLIT:
6341             src_fixup = get_complex_fixup(src_format->color_fixup);
6342             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6343             {
6344                 TRACE("Checking support for fixup:\n");
6345                 dump_color_fixup_desc(src_format->color_fixup);
6346             }
6347
6348             if (!is_identity_fixup(dst_format->color_fixup))
6349             {
6350                 TRACE("Destination fixups are not supported\n");
6351                 return FALSE;
6352             }
6353
6354             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6355             {
6356                 TRACE("P8 fixup supported\n");
6357                 return TRUE;
6358             }
6359
6360             /* We only support identity conversions. */
6361             if (is_identity_fixup(src_format->color_fixup))
6362             {
6363                 TRACE("[OK]\n");
6364                 return TRUE;
6365             }
6366
6367             TRACE("[FAILED]\n");
6368             return FALSE;
6369
6370         case WINED3D_BLIT_OP_COLOR_FILL:
6371             if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6372             {
6373                 TRACE("Color fill not supported\n");
6374                 return FALSE;
6375             }
6376
6377             return TRUE;
6378
6379         case WINED3D_BLIT_OP_DEPTH_FILL:
6380             return TRUE;
6381
6382         default:
6383             TRACE("Unsupported blit_op=%d\n", blit_op);
6384             return FALSE;
6385     }
6386 }
6387
6388 /* Do not call while under the GL lock. */
6389 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6390         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6391 {
6392     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6393
6394     return device_clear_render_targets(device, 1, &dst_surface, NULL,
6395             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6396 }
6397
6398 /* Do not call while under the GL lock. */
6399 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6400         struct wined3d_surface *surface, const RECT *rect, float depth)
6401 {
6402     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6403
6404     return device_clear_render_targets(device, 0, NULL, surface,
6405             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6406 }
6407
6408 const struct blit_shader ffp_blit =  {
6409     ffp_blit_alloc,
6410     ffp_blit_free,
6411     ffp_blit_set,
6412     ffp_blit_unset,
6413     ffp_blit_supported,
6414     ffp_blit_color_fill,
6415     ffp_blit_depth_fill,
6416 };
6417
6418 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6419 {
6420     return WINED3D_OK;
6421 }
6422
6423 /* Context activation is done by the caller. */
6424 static void cpu_blit_free(struct wined3d_device *device)
6425 {
6426 }
6427
6428 /* Context activation is done by the caller. */
6429 static HRESULT cpu_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6430 {
6431     return WINED3D_OK;
6432 }
6433
6434 /* Context activation is done by the caller. */
6435 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6436 {
6437 }
6438
6439 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6440         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6441         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6442 {
6443     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6444     {
6445         return TRUE;
6446     }
6447
6448     return FALSE;
6449 }
6450
6451 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6452         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6453         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6454 {
6455     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6456     const struct wined3d_format *src_format, *dst_format;
6457     struct wined3d_surface *orig_src = src_surface;
6458     WINED3DLOCKED_RECT dlock, slock;
6459     HRESULT hr = WINED3D_OK;
6460     const BYTE *sbuf;
6461     RECT xdst,xsrc;
6462     BYTE *dbuf;
6463     int x, y;
6464
6465     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6466             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6467             flags, fx, debug_d3dtexturefiltertype(filter));
6468
6469     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
6470     {
6471         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY\n");
6472         return WINEDDERR_SURFACEBUSY;
6473     }
6474
6475     /* First check for the validity of source / destination rectangles.
6476      * This was verified using a test application and by MSDN. */
6477     if (src_rect)
6478     {
6479         if (src_surface)
6480         {
6481             if (src_rect->right < src_rect->left || src_rect->bottom < src_rect->top
6482                     || src_rect->left > src_surface->resource.width || src_rect->left < 0
6483                     || src_rect->top > src_surface->resource.height || src_rect->top < 0
6484                     || src_rect->right > src_surface->resource.width || src_rect->right < 0
6485                     || src_rect->bottom > src_surface->resource.height || src_rect->bottom < 0)
6486             {
6487                 WARN("Application gave us bad source rectangle for Blt.\n");
6488                 return WINEDDERR_INVALIDRECT;
6489             }
6490
6491             if (!src_rect->right || !src_rect->bottom
6492                     || src_rect->left == (int)src_surface->resource.width
6493                     || src_rect->top == (int)src_surface->resource.height)
6494             {
6495                 TRACE("Nothing to be done.\n");
6496                 return WINED3D_OK;
6497             }
6498         }
6499
6500         xsrc = *src_rect;
6501     }
6502     else if (src_surface)
6503     {
6504         xsrc.left = 0;
6505         xsrc.top = 0;
6506         xsrc.right = src_surface->resource.width;
6507         xsrc.bottom = src_surface->resource.height;
6508     }
6509     else
6510     {
6511         memset(&xsrc, 0, sizeof(xsrc));
6512     }
6513
6514     if (dst_rect)
6515     {
6516         /* For the Destination rect, it can be out of bounds on the condition
6517          * that a clipper is set for the given surface. */
6518         if (!dst_surface->clipper && (dst_rect->right < dst_rect->left || dst_rect->bottom < dst_rect->top
6519                 || dst_rect->left > dst_surface->resource.width || dst_rect->left < 0
6520                 || dst_rect->top > dst_surface->resource.height || dst_rect->top < 0
6521                 || dst_rect->right > dst_surface->resource.width || dst_rect->right < 0
6522                 || dst_rect->bottom > dst_surface->resource.height || dst_rect->bottom < 0))
6523         {
6524             WARN("Application gave us bad destination rectangle for Blt without a clipper set.\n");
6525             return WINEDDERR_INVALIDRECT;
6526         }
6527
6528         if (dst_rect->right <= 0 || dst_rect->bottom <= 0
6529                 || dst_rect->left >= (int)dst_surface->resource.width
6530                 || dst_rect->top >= (int)dst_surface->resource.height)
6531         {
6532             TRACE("Nothing to be done.\n");
6533             return WINED3D_OK;
6534         }
6535
6536         if (!src_surface)
6537         {
6538             RECT full_rect;
6539
6540             full_rect.left = 0;
6541             full_rect.top = 0;
6542             full_rect.right = dst_surface->resource.width;
6543             full_rect.bottom = dst_surface->resource.height;
6544             IntersectRect(&xdst, &full_rect, dst_rect);
6545         }
6546         else
6547         {
6548             BOOL clip_horiz, clip_vert;
6549
6550             xdst = *dst_rect;
6551             clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6552             clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6553
6554             if (clip_vert || clip_horiz)
6555             {
6556                 /* Now check if this is a special case or not... */
6557                 if ((flags & WINEDDBLT_DDFX)
6558                         || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6559                         || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6560                 {
6561                     WARN("Out of screen rectangle in special case. Not handled right now.\n");
6562                     return WINED3D_OK;
6563                 }
6564
6565                 if (clip_horiz)
6566                 {
6567                     if (xdst.left < 0)
6568                     {
6569                         xsrc.left -= xdst.left;
6570                         xdst.left = 0;
6571                     }
6572                     if (xdst.right > dst_surface->resource.width)
6573                     {
6574                         xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6575                         xdst.right = (int)dst_surface->resource.width;
6576                     }
6577                 }
6578
6579                 if (clip_vert)
6580                 {
6581                     if (xdst.top < 0)
6582                     {
6583                         xsrc.top -= xdst.top;
6584                         xdst.top = 0;
6585                     }
6586                     if (xdst.bottom > dst_surface->resource.height)
6587                     {
6588                         xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6589                         xdst.bottom = (int)dst_surface->resource.height;
6590                     }
6591                 }
6592
6593                 /* And check if after clipping something is still to be done... */
6594                 if ((xdst.right <= 0) || (xdst.bottom <= 0)
6595                         || (xdst.left >= (int)dst_surface->resource.width)
6596                         || (xdst.top >= (int)dst_surface->resource.height)
6597                         || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6598                         || (xsrc.left >= (int)src_surface->resource.width)
6599                         || (xsrc.top >= (int)src_surface->resource.height))
6600                 {
6601                     TRACE("Nothing to be done after clipping.\n");
6602                     return WINED3D_OK;
6603                 }
6604             }
6605         }
6606     }
6607     else
6608     {
6609         xdst.left = 0;
6610         xdst.top = 0;
6611         xdst.right = dst_surface->resource.width;
6612         xdst.bottom = dst_surface->resource.height;
6613     }
6614
6615     if (src_surface == dst_surface)
6616     {
6617         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6618         slock = dlock;
6619         src_format = dst_surface->resource.format;
6620         dst_format = src_format;
6621     }
6622     else
6623     {
6624         dst_format = dst_surface->resource.format;
6625         if (src_surface)
6626         {
6627             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6628             {
6629                 src_surface = surface_convert_format(src_surface, dst_format->id);
6630                 if (!src_surface)
6631                 {
6632                     /* The conv function writes a FIXME */
6633                     WARN("Cannot convert source surface format to dest format.\n");
6634                     goto release;
6635                 }
6636             }
6637             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6638             src_format = src_surface->resource.format;
6639         }
6640         else
6641         {
6642             src_format = dst_format;
6643         }
6644         if (dst_rect)
6645             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6646         else
6647             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6648     }
6649
6650     if (!fx || !(fx->dwDDFX)) flags &= ~WINEDDBLT_DDFX;
6651
6652     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_FOURCC)
6653     {
6654         if (!dst_rect || src_surface == dst_surface)
6655         {
6656             memcpy(dlock.pBits, slock.pBits, dst_surface->resource.size);
6657             goto release;
6658         }
6659     }
6660
6661     bpp = dst_surface->resource.format->byte_count;
6662     srcheight = xsrc.bottom - xsrc.top;
6663     srcwidth = xsrc.right - xsrc.left;
6664     dstheight = xdst.bottom - xdst.top;
6665     dstwidth = xdst.right - xdst.left;
6666     width = (xdst.right - xdst.left) * bpp;
6667
6668     if (dst_rect && src_surface != dst_surface)
6669         dbuf = dlock.pBits;
6670     else
6671         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6672
6673     if (flags & WINEDDBLT_WAIT)
6674     {
6675         flags &= ~WINEDDBLT_WAIT;
6676     }
6677     if (flags & WINEDDBLT_ASYNC)
6678     {
6679         static BOOL displayed = FALSE;
6680         if (!displayed)
6681             FIXME("Can't handle WINEDDBLT_ASYNC flag right now.\n");
6682         displayed = TRUE;
6683         flags &= ~WINEDDBLT_ASYNC;
6684     }
6685     if (flags & WINEDDBLT_DONOTWAIT)
6686     {
6687         /* WINEDDBLT_DONOTWAIT appeared in DX7 */
6688         static BOOL displayed = FALSE;
6689         if (!displayed)
6690             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag right now.\n");
6691         displayed = TRUE;
6692         flags &= ~WINEDDBLT_DONOTWAIT;
6693     }
6694
6695     /* First, all the 'source-less' blits */
6696     if (flags & WINEDDBLT_COLORFILL)
6697     {
6698         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6699         flags &= ~WINEDDBLT_COLORFILL;
6700     }
6701
6702     if (flags & WINEDDBLT_DEPTHFILL)
6703     {
6704         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6705     }
6706     if (flags & WINEDDBLT_ROP)
6707     {
6708         /* Catch some degenerate cases here. */
6709         switch (fx->dwROP)
6710         {
6711             case BLACKNESS:
6712                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6713                 break;
6714             case 0xAA0029: /* No-op */
6715                 break;
6716             case WHITENESS:
6717                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6718                 break;
6719             case SRCCOPY: /* Well, we do that below? */
6720                 break;
6721             default:
6722                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6723                 goto error;
6724         }
6725         flags &= ~WINEDDBLT_ROP;
6726     }
6727     if (flags & WINEDDBLT_DDROPS)
6728     {
6729         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6730     }
6731     /* Now the 'with source' blits. */
6732     if (src_surface)
6733     {
6734         const BYTE *sbase;
6735         int sx, xinc, sy, yinc;
6736
6737         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6738             goto release;
6739
6740         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6741                 && (srcwidth != dstwidth || srcheight != dstheight))
6742         {
6743             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6744             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6745         }
6746
6747         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6748         xinc = (srcwidth << 16) / dstwidth;
6749         yinc = (srcheight << 16) / dstheight;
6750
6751         if (!flags)
6752         {
6753             /* No effects, we can cheat here. */
6754             if (dstwidth == srcwidth)
6755             {
6756                 if (dstheight == srcheight)
6757                 {
6758                     /* No stretching in either direction. This needs to be as
6759                      * fast as possible. */
6760                     sbuf = sbase;
6761
6762                     /* Check for overlapping surfaces. */
6763                     if (src_surface != dst_surface || xdst.top < xsrc.top
6764                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6765                     {
6766                         /* No overlap, or dst above src, so copy from top downwards. */
6767                         for (y = 0; y < dstheight; ++y)
6768                         {
6769                             memcpy(dbuf, sbuf, width);
6770                             sbuf += slock.Pitch;
6771                             dbuf += dlock.Pitch;
6772                         }
6773                     }
6774                     else if (xdst.top > xsrc.top)
6775                     {
6776                         /* Copy from bottom upwards. */
6777                         sbuf += (slock.Pitch*dstheight);
6778                         dbuf += (dlock.Pitch*dstheight);
6779                         for (y = 0; y < dstheight; ++y)
6780                         {
6781                             sbuf -= slock.Pitch;
6782                             dbuf -= dlock.Pitch;
6783                             memcpy(dbuf, sbuf, width);
6784                         }
6785                     }
6786                     else
6787                     {
6788                         /* Src and dst overlapping on the same line, use memmove. */
6789                         for (y = 0; y < dstheight; ++y)
6790                         {
6791                             memmove(dbuf, sbuf, width);
6792                             sbuf += slock.Pitch;
6793                             dbuf += dlock.Pitch;
6794                         }
6795                     }
6796                 }
6797                 else
6798                 {
6799                     /* Stretching in y direction only. */
6800                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6801                     {
6802                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6803                         memcpy(dbuf, sbuf, width);
6804                         dbuf += dlock.Pitch;
6805                     }
6806                 }
6807             }
6808             else
6809             {
6810                 /* Stretching in X direction. */
6811                 int last_sy = -1;
6812                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6813                 {
6814                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6815
6816                     if ((sy >> 16) == (last_sy >> 16))
6817                     {
6818                         /* This source row is the same as last source row -
6819                          * Copy the already stretched row. */
6820                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6821                     }
6822                     else
6823                     {
6824 #define STRETCH_ROW(type) \
6825 do { \
6826     const type *s = (const type *)sbuf; \
6827     type *d = (type *)dbuf; \
6828     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6829         d[x] = s[sx >> 16]; \
6830 } while(0)
6831
6832                         switch(bpp)
6833                         {
6834                             case 1:
6835                                 STRETCH_ROW(BYTE);
6836                                 break;
6837                             case 2:
6838                                 STRETCH_ROW(WORD);
6839                                 break;
6840                             case 4:
6841                                 STRETCH_ROW(DWORD);
6842                                 break;
6843                             case 3:
6844                             {
6845                                 const BYTE *s;
6846                                 BYTE *d = dbuf;
6847                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6848                                 {
6849                                     DWORD pixel;
6850
6851                                     s = sbuf + 3 * (sx >> 16);
6852                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6853                                     d[0] = (pixel      ) & 0xff;
6854                                     d[1] = (pixel >>  8) & 0xff;
6855                                     d[2] = (pixel >> 16) & 0xff;
6856                                     d += 3;
6857                                 }
6858                                 break;
6859                             }
6860                             default:
6861                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6862                                 hr = WINED3DERR_NOTAVAILABLE;
6863                                 goto error;
6864                         }
6865 #undef STRETCH_ROW
6866                     }
6867                     dbuf += dlock.Pitch;
6868                     last_sy = sy;
6869                 }
6870             }
6871         }
6872         else
6873         {
6874             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6875             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6876             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6877             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6878             {
6879                 /* The color keying flags are checked for correctness in ddraw */
6880                 if (flags & WINEDDBLT_KEYSRC)
6881                 {
6882                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6883                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6884                 }
6885                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6886                 {
6887                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6888                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6889                 }
6890
6891                 if (flags & WINEDDBLT_KEYDEST)
6892                 {
6893                     /* Destination color keys are taken from the source surface! */
6894                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6895                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6896                 }
6897                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6898                 {
6899                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6900                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6901                 }
6902
6903                 if (bpp == 1)
6904                 {
6905                     keymask = 0xff;
6906                 }
6907                 else
6908                 {
6909                     keymask = src_format->red_mask
6910                             | src_format->green_mask
6911                             | src_format->blue_mask;
6912                 }
6913                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6914             }
6915
6916             if (flags & WINEDDBLT_DDFX)
6917             {
6918                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6919                 LONG tmpxy;
6920                 dTopLeft     = dbuf;
6921                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6922                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6923                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6924
6925                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6926                 {
6927                     /* I don't think we need to do anything about this flag */
6928                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6929                 }
6930                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6931                 {
6932                     tmp          = dTopRight;
6933                     dTopRight    = dTopLeft;
6934                     dTopLeft     = tmp;
6935                     tmp          = dBottomRight;
6936                     dBottomRight = dBottomLeft;
6937                     dBottomLeft  = tmp;
6938                     dstxinc = dstxinc * -1;
6939                 }
6940                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6941                 {
6942                     tmp          = dTopLeft;
6943                     dTopLeft     = dBottomLeft;
6944                     dBottomLeft  = tmp;
6945                     tmp          = dTopRight;
6946                     dTopRight    = dBottomRight;
6947                     dBottomRight = tmp;
6948                     dstyinc = dstyinc * -1;
6949                 }
6950                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6951                 {
6952                     /* I don't think we need to do anything about this flag */
6953                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6954                 }
6955                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6956                 {
6957                     tmp          = dBottomRight;
6958                     dBottomRight = dTopLeft;
6959                     dTopLeft     = tmp;
6960                     tmp          = dBottomLeft;
6961                     dBottomLeft  = dTopRight;
6962                     dTopRight    = tmp;
6963                     dstxinc = dstxinc * -1;
6964                     dstyinc = dstyinc * -1;
6965                 }
6966                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6967                 {
6968                     tmp          = dTopLeft;
6969                     dTopLeft     = dBottomLeft;
6970                     dBottomLeft  = dBottomRight;
6971                     dBottomRight = dTopRight;
6972                     dTopRight    = tmp;
6973                     tmpxy   = dstxinc;
6974                     dstxinc = dstyinc;
6975                     dstyinc = tmpxy;
6976                     dstxinc = dstxinc * -1;
6977                 }
6978                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6979                 {
6980                     tmp          = dTopLeft;
6981                     dTopLeft     = dTopRight;
6982                     dTopRight    = dBottomRight;
6983                     dBottomRight = dBottomLeft;
6984                     dBottomLeft  = tmp;
6985                     tmpxy   = dstxinc;
6986                     dstxinc = dstyinc;
6987                     dstyinc = tmpxy;
6988                     dstyinc = dstyinc * -1;
6989                 }
6990                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6991                 {
6992                     /* I don't think we need to do anything about this flag */
6993                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6994                 }
6995                 dbuf = dTopLeft;
6996                 flags &= ~(WINEDDBLT_DDFX);
6997             }
6998
6999 #define COPY_COLORKEY_FX(type) \
7000 do { \
7001     const type *s; \
7002     type *d = (type *)dbuf, *dx, tmp; \
7003     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7004     { \
7005         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
7006         dx = d; \
7007         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7008         { \
7009             tmp = s[sx >> 16]; \
7010             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7011                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7012             { \
7013                 dx[0] = tmp; \
7014             } \
7015             dx = (type *)(((BYTE *)dx) + dstxinc); \
7016         } \
7017         d = (type *)(((BYTE *)d) + dstyinc); \
7018     } \
7019 } while(0)
7020
7021             switch (bpp)
7022             {
7023                 case 1:
7024                     COPY_COLORKEY_FX(BYTE);
7025                     break;
7026                 case 2:
7027                     COPY_COLORKEY_FX(WORD);
7028                     break;
7029                 case 4:
7030                     COPY_COLORKEY_FX(DWORD);
7031                     break;
7032                 case 3:
7033                 {
7034                     const BYTE *s;
7035                     BYTE *d = dbuf, *dx;
7036                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7037                     {
7038                         sbuf = sbase + (sy >> 16) * slock.Pitch;
7039                         dx = d;
7040                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7041                         {
7042                             DWORD pixel, dpixel = 0;
7043                             s = sbuf + 3 * (sx>>16);
7044                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7045                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7046                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7047                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7048                             {
7049                                 dx[0] = (pixel      ) & 0xff;
7050                                 dx[1] = (pixel >>  8) & 0xff;
7051                                 dx[2] = (pixel >> 16) & 0xff;
7052                             }
7053                             dx += dstxinc;
7054                         }
7055                         d += dstyinc;
7056                     }
7057                     break;
7058                 }
7059                 default:
7060                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7061                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7062                     hr = WINED3DERR_NOTAVAILABLE;
7063                     goto error;
7064 #undef COPY_COLORKEY_FX
7065             }
7066         }
7067     }
7068
7069 error:
7070     if (flags && FIXME_ON(d3d_surface))
7071     {
7072         FIXME("\tUnsupported flags: %#x.\n", flags);
7073     }
7074
7075 release:
7076     wined3d_surface_unmap(dst_surface);
7077     if (src_surface && src_surface != dst_surface)
7078         wined3d_surface_unmap(src_surface);
7079     /* Release the converted surface, if any. */
7080     if (src_surface && src_surface != orig_src)
7081         wined3d_surface_decref(src_surface);
7082
7083     return hr;
7084 }
7085
7086 static HRESULT surface_cpu_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
7087         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
7088 {
7089     const struct wined3d_format *src_format, *dst_format;
7090     RECT lock_src, lock_dst, lock_union;
7091     WINED3DLOCKED_RECT dlock, slock;
7092     HRESULT hr = WINED3D_OK;
7093     int bpp, w, h, x, y;
7094     const BYTE *sbuf;
7095     BYTE *dbuf;
7096     RECT rsrc2;
7097
7098     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
7099             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
7100
7101     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface->flags & SFLAG_LOCKED))
7102     {
7103         WARN(" Surface is busy, returning DDERR_SURFACEBUSY\n");
7104         return WINEDDERR_SURFACEBUSY;
7105     }
7106
7107     if (!src_rect)
7108     {
7109         WARN("src_rect is NULL!\n");
7110         rsrc2.left = 0;
7111         rsrc2.top = 0;
7112         rsrc2.right = src_surface->resource.width;
7113         rsrc2.bottom = src_surface->resource.height;
7114         src_rect = &rsrc2;
7115     }
7116
7117     /* Check source rect for validity. Copied from normal Blt. Fixes Baldur's Gate. */
7118     if ((src_rect->bottom > src_surface->resource.height) || (src_rect->bottom < 0)
7119             || (src_rect->top > src_surface->resource.height) || (src_rect->top < 0)
7120             || (src_rect->left > src_surface->resource.width) || (src_rect->left < 0)
7121             || (src_rect->right > src_surface->resource.width) || (src_rect->right < 0)
7122             || (src_rect->right < src_rect->left) || (src_rect->bottom < src_rect->top))
7123     {
7124         WARN("Application gave us bad source rectangle for BltFast.\n");
7125         return WINEDDERR_INVALIDRECT;
7126     }
7127
7128     h = src_rect->bottom - src_rect->top;
7129     if (h > dst_surface->resource.height - dst_y)
7130         h = dst_surface->resource.height - dst_y;
7131     if (h > src_surface->resource.height - src_rect->top)
7132         h = src_surface->resource.height - src_rect->top;
7133     if (h <= 0)
7134         return WINEDDERR_INVALIDRECT;
7135
7136     w = src_rect->right - src_rect->left;
7137     if (w > dst_surface->resource.width - dst_x)
7138         w = dst_surface->resource.width - dst_x;
7139     if (w > src_surface->resource.width - src_rect->left)
7140         w = src_surface->resource.width - src_rect->left;
7141     if (w <= 0)
7142         return WINEDDERR_INVALIDRECT;
7143
7144     /* Now compute the locking rectangle... */
7145     lock_src.left = src_rect->left;
7146     lock_src.top = src_rect->top;
7147     lock_src.right = lock_src.left + w;
7148     lock_src.bottom = lock_src.top + h;
7149
7150     lock_dst.left = dst_x;
7151     lock_dst.top = dst_y;
7152     lock_dst.right = dst_x + w;
7153     lock_dst.bottom = dst_y + h;
7154
7155     bpp = dst_surface->resource.format->byte_count;
7156
7157     /* We need to lock the surfaces, or we won't get refreshes when done. */
7158     if (src_surface == dst_surface)
7159     {
7160         int pitch;
7161
7162         UnionRect(&lock_union, &lock_src, &lock_dst);
7163
7164         /* Lock the union of the two rectangles. */
7165         hr = wined3d_surface_map(dst_surface, &dlock, &lock_union, 0);
7166         if (FAILED(hr))
7167             goto error;
7168
7169         pitch = dlock.Pitch;
7170         slock.Pitch = dlock.Pitch;
7171
7172         /* Since slock was originally copied from this surface's description, we can just reuse it. */
7173         sbuf = dst_surface->resource.allocatedMemory + lock_src.top * pitch + lock_src.left * bpp;
7174         dbuf = dst_surface->resource.allocatedMemory + lock_dst.top * pitch + lock_dst.left * bpp;
7175         src_format = src_surface->resource.format;
7176         dst_format = src_format;
7177     }
7178     else
7179     {
7180         hr = wined3d_surface_map(src_surface, &slock, &lock_src, WINED3DLOCK_READONLY);
7181         if (FAILED(hr))
7182             goto error;
7183         hr = wined3d_surface_map(dst_surface, &dlock, &lock_dst, 0);
7184         if (FAILED(hr))
7185             goto error;
7186
7187         sbuf = slock.pBits;
7188         dbuf = dlock.pBits;
7189         TRACE("Dst is at %p, Src is at %p.\n", dbuf, sbuf);
7190
7191         src_format = src_surface->resource.format;
7192         dst_format = dst_surface->resource.format;
7193     }
7194
7195     /* Handle compressed surfaces first... */
7196     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
7197     {
7198         UINT row_block_count;
7199
7200         TRACE("compressed -> compressed copy\n");
7201         if (trans)
7202             FIXME("trans arg not supported when a compressed surface is involved\n");
7203         if (dst_x || dst_y)
7204             FIXME("offset for destination surface is not supported\n");
7205         if (src_surface->resource.format->id != dst_surface->resource.format->id)
7206         {
7207             FIXME("compressed -> compressed copy only supported for the same type of surface\n");
7208             hr = WINED3DERR_WRONGTEXTUREFORMAT;
7209             goto error;
7210         }
7211
7212         row_block_count = (w + dst_format->block_width - 1) / dst_format->block_width;
7213         for (y = 0; y < h; y += dst_format->block_height)
7214         {
7215             memcpy(dbuf, sbuf, row_block_count * dst_format->block_byte_count);
7216             dbuf += dlock.Pitch;
7217             sbuf += slock.Pitch;
7218         }
7219
7220         goto error;
7221     }
7222     if ((src_format->flags & WINED3DFMT_FLAG_COMPRESSED) && !(dst_format->flags & WINED3DFMT_FLAG_COMPRESSED))
7223     {
7224         /* TODO: Use the libtxc_dxtn.so shared library to do software
7225          * decompression. */
7226         ERR("Software decompression not supported.\n");
7227         goto error;
7228     }
7229
7230     if (trans & (WINEDDBLTFAST_SRCCOLORKEY | WINEDDBLTFAST_DESTCOLORKEY))
7231     {
7232         DWORD keylow, keyhigh;
7233         DWORD mask = src_surface->resource.format->red_mask
7234                 | src_surface->resource.format->green_mask
7235                 | src_surface->resource.format->blue_mask;
7236
7237         /* For some 8-bit formats like L8 and P8 color masks don't make sense */
7238         if (!mask && bpp == 1)
7239             mask = 0xff;
7240
7241         TRACE("Color keyed copy.\n");
7242         if (trans & WINEDDBLTFAST_SRCCOLORKEY)
7243         {
7244             keylow = src_surface->SrcBltCKey.dwColorSpaceLowValue;
7245             keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
7246         }
7247         else
7248         {
7249             /* I'm not sure if this is correct. */
7250             FIXME("WINEDDBLTFAST_DESTCOLORKEY not fully supported yet.\n");
7251             keylow = dst_surface->DestBltCKey.dwColorSpaceLowValue;
7252             keyhigh = dst_surface->DestBltCKey.dwColorSpaceHighValue;
7253         }
7254
7255 #define COPYBOX_COLORKEY(type) \
7256 do { \
7257     const type *s = (const type *)sbuf; \
7258     type *d = (type *)dbuf; \
7259     type tmp; \
7260     for (y = 0; y < h; y++) \
7261     { \
7262         for (x = 0; x < w; x++) \
7263         { \
7264             tmp = s[x]; \
7265             if ((tmp & mask) < keylow || (tmp & mask) > keyhigh) d[x] = tmp; \
7266         } \
7267         s = (const type *)((const BYTE *)s + slock.Pitch); \
7268         d = (type *)((BYTE *)d + dlock.Pitch); \
7269     } \
7270 } while(0)
7271
7272         switch (bpp)
7273         {
7274             case 1:
7275                 COPYBOX_COLORKEY(BYTE);
7276                 break;
7277             case 2:
7278                 COPYBOX_COLORKEY(WORD);
7279                 break;
7280             case 4:
7281                 COPYBOX_COLORKEY(DWORD);
7282                 break;
7283             case 3:
7284             {
7285                 const BYTE *s;
7286                 DWORD tmp;
7287                 BYTE *d;
7288                 s = sbuf;
7289                 d = dbuf;
7290                 for (y = 0; y < h; ++y)
7291                 {
7292                     for (x = 0; x < w * 3; x += 3)
7293                     {
7294                         tmp = (DWORD)s[x] + ((DWORD)s[x + 1] << 8) + ((DWORD)s[x + 2] << 16);
7295                         if (tmp < keylow || tmp > keyhigh)
7296                         {
7297                             d[x + 0] = s[x + 0];
7298                             d[x + 1] = s[x + 1];
7299                             d[x + 2] = s[x + 2];
7300                         }
7301                     }
7302                     s += slock.Pitch;
7303                     d += dlock.Pitch;
7304                 }
7305                 break;
7306             }
7307             default:
7308                 FIXME("Source color key blitting not supported for bpp %u.\n", bpp * 8);
7309                 hr = WINED3DERR_NOTAVAILABLE;
7310                 goto error;
7311         }
7312 #undef COPYBOX_COLORKEY
7313         TRACE("Copy done.\n");
7314     }
7315     else
7316     {
7317         int width = w * bpp;
7318         INT sbufpitch, dbufpitch;
7319
7320         TRACE("No color key copy.\n");
7321         /* Handle overlapping surfaces. */
7322         if (sbuf < dbuf)
7323         {
7324             sbuf += (h - 1) * slock.Pitch;
7325             dbuf += (h - 1) * dlock.Pitch;
7326             sbufpitch = -slock.Pitch;
7327             dbufpitch = -dlock.Pitch;
7328         }
7329         else
7330         {
7331             sbufpitch = slock.Pitch;
7332             dbufpitch = dlock.Pitch;
7333         }
7334         for (y = 0; y < h; ++y)
7335         {
7336             /* This is pretty easy, a line for line memcpy. */
7337             memmove(dbuf, sbuf, width);
7338             sbuf += sbufpitch;
7339             dbuf += dbufpitch;
7340         }
7341         TRACE("Copy done.\n");
7342     }
7343
7344 error:
7345     if (src_surface == dst_surface)
7346     {
7347         wined3d_surface_unmap(dst_surface);
7348     }
7349     else
7350     {
7351         wined3d_surface_unmap(dst_surface);
7352         wined3d_surface_unmap(src_surface);
7353     }
7354
7355     return hr;
7356 }
7357
7358 /* Do not call while under the GL lock. */
7359 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7360         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7361 {
7362     WINEDDBLTFX BltFx;
7363
7364     memset(&BltFx, 0, sizeof(BltFx));
7365     BltFx.dwSize = sizeof(BltFx);
7366     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface->resource.format, color);
7367     return wined3d_surface_blt(dst_surface, dst_rect, NULL, NULL,
7368             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7369 }
7370
7371 /* Do not call while under the GL lock. */
7372 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7373         struct wined3d_surface *surface, const RECT *rect, float depth)
7374 {
7375     FIXME("Depth filling not implemented by cpu_blit.\n");
7376     return WINED3DERR_INVALIDCALL;
7377 }
7378
7379 const struct blit_shader cpu_blit =  {
7380     cpu_blit_alloc,
7381     cpu_blit_free,
7382     cpu_blit_set,
7383     cpu_blit_unset,
7384     cpu_blit_supported,
7385     cpu_blit_color_fill,
7386     cpu_blit_depth_fill,
7387 };
7388
7389 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7390         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7391         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7392         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7393 {
7394     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7395     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7396     unsigned int resource_size;
7397     HRESULT hr;
7398
7399     if (multisample_quality > 0)
7400     {
7401         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7402         multisample_quality = 0;
7403     }
7404
7405     /* Quick lockable sanity check.
7406      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7407      * this function is too deep to need to care about things like this.
7408      * Levels need to be checked too, since they all affect what can be done. */
7409     switch (pool)
7410     {
7411         case WINED3DPOOL_SCRATCH:
7412             if (!lockable)
7413             {
7414                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7415                         "which are mutually exclusive, setting lockable to TRUE.\n");
7416                 lockable = TRUE;
7417             }
7418             break;
7419
7420         case WINED3DPOOL_SYSTEMMEM:
7421             if (!lockable)
7422                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7423             break;
7424
7425         case WINED3DPOOL_MANAGED:
7426             if (usage & WINED3DUSAGE_DYNAMIC)
7427                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7428             break;
7429
7430         case WINED3DPOOL_DEFAULT:
7431             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7432                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7433             break;
7434
7435         default:
7436             FIXME("Unknown pool %#x.\n", pool);
7437             break;
7438     };
7439
7440     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7441         FIXME("Trying to create a render target that isn't in the default pool.\n");
7442
7443     /* FIXME: Check that the format is supported by the device. */
7444
7445     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7446     if (!resource_size)
7447         return WINED3DERR_INVALIDCALL;
7448
7449     surface->surface_type = surface_type;
7450
7451     switch (surface_type)
7452     {
7453         case SURFACE_OPENGL:
7454             surface->surface_ops = &surface_ops;
7455             break;
7456
7457         case SURFACE_GDI:
7458             surface->surface_ops = &gdi_surface_ops;
7459             break;
7460
7461         default:
7462             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7463             return WINED3DERR_INVALIDCALL;
7464     }
7465
7466     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7467             multisample_type, multisample_quality, usage, pool, width, height, 1,
7468             resource_size, parent, parent_ops, &surface_resource_ops);
7469     if (FAILED(hr))
7470     {
7471         WARN("Failed to initialize resource, returning %#x.\n", hr);
7472         return hr;
7473     }
7474
7475     /* "Standalone" surface. */
7476     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7477
7478     surface->texture_level = level;
7479     list_init(&surface->overlays);
7480
7481     /* Flags */
7482     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7483     if (discard)
7484         surface->flags |= SFLAG_DISCARD;
7485     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7486         surface->flags |= SFLAG_LOCKABLE;
7487     /* I'm not sure if this qualifies as a hack or as an optimization. It
7488      * seems reasonable to assume that lockable render targets will get
7489      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7490      * creation. However, the other reason we want to do this is that several
7491      * ddraw applications access surface memory while the surface isn't
7492      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7493      * future locks prevents these from crashing. */
7494     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7495         surface->flags |= SFLAG_DYNLOCK;
7496
7497     /* Mark the texture as dirty so that it gets loaded first time around. */
7498     surface_add_dirty_rect(surface, NULL);
7499     list_init(&surface->renderbuffers);
7500
7501     TRACE("surface %p, memory %p, size %u\n",
7502             surface, surface->resource.allocatedMemory, surface->resource.size);
7503
7504     /* Call the private setup routine */
7505     hr = surface->surface_ops->surface_private_setup(surface);
7506     if (FAILED(hr))
7507     {
7508         ERR("Private setup failed, returning %#x\n", hr);
7509         surface->surface_ops->surface_cleanup(surface);
7510         return hr;
7511     }
7512
7513     return hr;
7514 }
7515
7516 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7517         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7518         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7519         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7520 {
7521     struct wined3d_surface *object;
7522     HRESULT hr;
7523
7524     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7525             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7526     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7527             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7528     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7529
7530     if (surface_type == SURFACE_OPENGL && !device->adapter)
7531     {
7532         ERR("OpenGL surfaces are not available without OpenGL.\n");
7533         return WINED3DERR_NOTAVAILABLE;
7534     }
7535
7536     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7537     if (!object)
7538     {
7539         ERR("Failed to allocate surface memory.\n");
7540         return WINED3DERR_OUTOFVIDEOMEMORY;
7541     }
7542
7543     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7544             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7545     if (FAILED(hr))
7546     {
7547         WARN("Failed to initialize surface, returning %#x.\n", hr);
7548         HeapFree(GetProcessHeap(), 0, object);
7549         return hr;
7550     }
7551
7552     TRACE("Created surface %p.\n", object);
7553     *surface = object;
7554
7555     return hr;
7556 }