wined3d: Add compressed surface support to surface_cpu_blt().
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     TRACE("surface %p.\n", surface);
46
47     if (surface->texture_name || (surface->flags & SFLAG_PBO) || !list_empty(&surface->renderbuffers))
48     {
49         struct wined3d_renderbuffer_entry *entry, *entry2;
50         const struct wined3d_gl_info *gl_info;
51         struct wined3d_context *context;
52
53         context = context_acquire(surface->resource.device, NULL);
54         gl_info = context->gl_info;
55
56         ENTER_GL();
57
58         if (surface->texture_name)
59         {
60             TRACE("Deleting texture %u.\n", surface->texture_name);
61             glDeleteTextures(1, &surface->texture_name);
62         }
63
64         if (surface->flags & SFLAG_PBO)
65         {
66             TRACE("Deleting PBO %u.\n", surface->pbo);
67             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
68         }
69
70         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
71         {
72             TRACE("Deleting renderbuffer %u.\n", entry->id);
73             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
74             HeapFree(GetProcessHeap(), 0, entry);
75         }
76
77         LEAVE_GL();
78
79         context_release(context);
80     }
81
82     if (surface->flags & SFLAG_DIBSECTION)
83     {
84         /* Release the DC. */
85         SelectObject(surface->hDC, surface->dib.holdbitmap);
86         DeleteDC(surface->hDC);
87         /* Release the DIB section. */
88         DeleteObject(surface->dib.DIBsection);
89         surface->dib.bitmap_data = NULL;
90         surface->resource.allocatedMemory = NULL;
91     }
92
93     if (surface->flags & SFLAG_USERPTR)
94         wined3d_surface_set_mem(surface, NULL);
95     if (surface->overlay_dest)
96         list_remove(&surface->overlay_entry);
97
98     HeapFree(GetProcessHeap(), 0, surface->palette9);
99
100     resource_cleanup(&surface->resource);
101 }
102
103 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
104 {
105     TRACE("surface %p, container %p.\n", surface, container);
106
107     if (!container && type != WINED3D_CONTAINER_NONE)
108         ERR("Setting NULL container of type %#x.\n", type);
109
110     if (type == WINED3D_CONTAINER_SWAPCHAIN)
111     {
112         surface->get_drawable_size = get_drawable_size_swapchain;
113     }
114     else
115     {
116         switch (wined3d_settings.offscreen_rendering_mode)
117         {
118             case ORM_FBO:
119                 surface->get_drawable_size = get_drawable_size_fbo;
120                 break;
121
122             case ORM_BACKBUFFER:
123                 surface->get_drawable_size = get_drawable_size_backbuffer;
124                 break;
125
126             default:
127                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
128                 return;
129         }
130     }
131
132     surface->container.type = type;
133     surface->container.u.base = container;
134 }
135
136 struct blt_info
137 {
138     GLenum binding;
139     GLenum bind_target;
140     enum tex_types tex_type;
141     GLfloat coords[4][3];
142 };
143
144 struct float_rect
145 {
146     float l;
147     float t;
148     float r;
149     float b;
150 };
151
152 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
153 {
154     f->l = ((r->left * 2.0f) / w) - 1.0f;
155     f->t = ((r->top * 2.0f) / h) - 1.0f;
156     f->r = ((r->right * 2.0f) / w) - 1.0f;
157     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
158 }
159
160 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
161 {
162     GLfloat (*coords)[3] = info->coords;
163     struct float_rect f;
164
165     switch (target)
166     {
167         default:
168             FIXME("Unsupported texture target %#x\n", target);
169             /* Fall back to GL_TEXTURE_2D */
170         case GL_TEXTURE_2D:
171             info->binding = GL_TEXTURE_BINDING_2D;
172             info->bind_target = GL_TEXTURE_2D;
173             info->tex_type = tex_2d;
174             coords[0][0] = (float)rect->left / w;
175             coords[0][1] = (float)rect->top / h;
176             coords[0][2] = 0.0f;
177
178             coords[1][0] = (float)rect->right / w;
179             coords[1][1] = (float)rect->top / h;
180             coords[1][2] = 0.0f;
181
182             coords[2][0] = (float)rect->left / w;
183             coords[2][1] = (float)rect->bottom / h;
184             coords[2][2] = 0.0f;
185
186             coords[3][0] = (float)rect->right / w;
187             coords[3][1] = (float)rect->bottom / h;
188             coords[3][2] = 0.0f;
189             break;
190
191         case GL_TEXTURE_RECTANGLE_ARB:
192             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
193             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
194             info->tex_type = tex_rect;
195             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
196             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
197             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
198             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
199             break;
200
201         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
202             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
203             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
204             info->tex_type = tex_cube;
205             cube_coords_float(rect, w, h, &f);
206
207             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
208             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
209             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
210             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
211             break;
212
213         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
214             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
215             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
216             info->tex_type = tex_cube;
217             cube_coords_float(rect, w, h, &f);
218
219             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
220             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
221             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
222             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
223             break;
224
225         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
226             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
227             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
228             info->tex_type = tex_cube;
229             cube_coords_float(rect, w, h, &f);
230
231             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
232             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
233             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
234             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
235             break;
236
237         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
238             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
239             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
240             info->tex_type = tex_cube;
241             cube_coords_float(rect, w, h, &f);
242
243             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
244             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
245             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
246             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
247             break;
248
249         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
250             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
251             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
252             info->tex_type = tex_cube;
253             cube_coords_float(rect, w, h, &f);
254
255             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
256             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
257             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
258             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
259             break;
260
261         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
262             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
263             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
264             info->tex_type = tex_cube;
265             cube_coords_float(rect, w, h, &f);
266
267             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
268             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
269             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
270             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
271             break;
272     }
273 }
274
275 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
276 {
277     if (rect_in)
278         *rect_out = *rect_in;
279     else
280     {
281         rect_out->left = 0;
282         rect_out->top = 0;
283         rect_out->right = surface->resource.width;
284         rect_out->bottom = surface->resource.height;
285     }
286 }
287
288 /* GL locking and context activation is done by the caller */
289 void draw_textured_quad(const struct wined3d_surface *src_surface, const RECT *src_rect,
290         const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
291 {
292     struct blt_info info;
293
294     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
295
296     glEnable(info.bind_target);
297     checkGLcall("glEnable(bind_target)");
298
299     /* Bind the texture */
300     glBindTexture(info.bind_target, src_surface->texture_name);
301     checkGLcall("glBindTexture");
302
303     /* Filtering for StretchRect */
304     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
305             wined3d_gl_mag_filter(magLookup, Filter));
306     checkGLcall("glTexParameteri");
307     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
308             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
309     checkGLcall("glTexParameteri");
310     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
311     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
312     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
313     checkGLcall("glTexEnvi");
314
315     /* Draw a quad */
316     glBegin(GL_TRIANGLE_STRIP);
317     glTexCoord3fv(info.coords[0]);
318     glVertex2i(dst_rect->left, dst_rect->top);
319
320     glTexCoord3fv(info.coords[1]);
321     glVertex2i(dst_rect->right, dst_rect->top);
322
323     glTexCoord3fv(info.coords[2]);
324     glVertex2i(dst_rect->left, dst_rect->bottom);
325
326     glTexCoord3fv(info.coords[3]);
327     glVertex2i(dst_rect->right, dst_rect->bottom);
328     glEnd();
329
330     /* Unbind the texture */
331     glBindTexture(info.bind_target, 0);
332     checkGLcall("glBindTexture(info->bind_target, 0)");
333
334     /* We changed the filtering settings on the texture. Inform the
335      * container about this to get the filters reset properly next draw. */
336     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
337     {
338         struct wined3d_texture *texture = src_surface->container.u.texture;
339         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
340         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
341         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
342     }
343 }
344
345 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
346 {
347     const struct wined3d_format *format = surface->resource.format;
348     SYSTEM_INFO sysInfo;
349     BITMAPINFO *b_info;
350     int extraline = 0;
351     DWORD *masks;
352     UINT usage;
353     HDC dc;
354
355     TRACE("surface %p.\n", surface);
356
357     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
358     {
359         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
360         return WINED3DERR_INVALIDCALL;
361     }
362
363     switch (format->byte_count)
364     {
365         case 2:
366         case 4:
367             /* Allocate extra space to store the RGB bit masks. */
368             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
369             break;
370
371         case 3:
372             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
373             break;
374
375         default:
376             /* Allocate extra space for a palette. */
377             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
378                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
379             break;
380     }
381
382     if (!b_info)
383         return E_OUTOFMEMORY;
384
385     /* Some applications access the surface in via DWORDs, and do not take
386      * the necessary care at the end of the surface. So we need at least
387      * 4 extra bytes at the end of the surface. Check against the page size,
388      * if the last page used for the surface has at least 4 spare bytes we're
389      * safe, otherwise add an extra line to the DIB section. */
390     GetSystemInfo(&sysInfo);
391     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
392     {
393         extraline = 1;
394         TRACE("Adding an extra line to the DIB section.\n");
395     }
396
397     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
398     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
399     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
400     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
401     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
402             * wined3d_surface_get_pitch(surface);
403     b_info->bmiHeader.biPlanes = 1;
404     b_info->bmiHeader.biBitCount = format->byte_count * 8;
405
406     b_info->bmiHeader.biXPelsPerMeter = 0;
407     b_info->bmiHeader.biYPelsPerMeter = 0;
408     b_info->bmiHeader.biClrUsed = 0;
409     b_info->bmiHeader.biClrImportant = 0;
410
411     /* Get the bit masks */
412     masks = (DWORD *)b_info->bmiColors;
413     switch (surface->resource.format->id)
414     {
415         case WINED3DFMT_B8G8R8_UNORM:
416             usage = DIB_RGB_COLORS;
417             b_info->bmiHeader.biCompression = BI_RGB;
418             break;
419
420         case WINED3DFMT_B5G5R5X1_UNORM:
421         case WINED3DFMT_B5G5R5A1_UNORM:
422         case WINED3DFMT_B4G4R4A4_UNORM:
423         case WINED3DFMT_B4G4R4X4_UNORM:
424         case WINED3DFMT_B2G3R3_UNORM:
425         case WINED3DFMT_B2G3R3A8_UNORM:
426         case WINED3DFMT_R10G10B10A2_UNORM:
427         case WINED3DFMT_R8G8B8A8_UNORM:
428         case WINED3DFMT_R8G8B8X8_UNORM:
429         case WINED3DFMT_B10G10R10A2_UNORM:
430         case WINED3DFMT_B5G6R5_UNORM:
431         case WINED3DFMT_R16G16B16A16_UNORM:
432             usage = 0;
433             b_info->bmiHeader.biCompression = BI_BITFIELDS;
434             masks[0] = format->red_mask;
435             masks[1] = format->green_mask;
436             masks[2] = format->blue_mask;
437             break;
438
439         default:
440             /* Don't know palette */
441             b_info->bmiHeader.biCompression = BI_RGB;
442             usage = 0;
443             break;
444     }
445
446     if (!(dc = GetDC(0)))
447     {
448         HeapFree(GetProcessHeap(), 0, b_info);
449         return HRESULT_FROM_WIN32(GetLastError());
450     }
451
452     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
453             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
454             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
455     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
456     ReleaseDC(0, dc);
457
458     if (!surface->dib.DIBsection)
459     {
460         ERR("Failed to create DIB section.\n");
461         HeapFree(GetProcessHeap(), 0, b_info);
462         return HRESULT_FROM_WIN32(GetLastError());
463     }
464
465     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
466     /* Copy the existing surface to the dib section. */
467     if (surface->resource.allocatedMemory)
468     {
469         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
470                 surface->resource.height * wined3d_surface_get_pitch(surface));
471     }
472     else
473     {
474         /* This is to make maps read the GL texture although memory is allocated. */
475         surface->flags &= ~SFLAG_INSYSMEM;
476     }
477     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
478
479     HeapFree(GetProcessHeap(), 0, b_info);
480
481     /* Now allocate a DC. */
482     surface->hDC = CreateCompatibleDC(0);
483     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
484     TRACE("Using wined3d palette %p.\n", surface->palette);
485     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
486
487     surface->flags |= SFLAG_DIBSECTION;
488
489     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
490     surface->resource.heapMemory = NULL;
491
492     return WINED3D_OK;
493 }
494
495 static void surface_prepare_system_memory(struct wined3d_surface *surface)
496 {
497     struct wined3d_device *device = surface->resource.device;
498     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
499
500     TRACE("surface %p.\n", surface);
501
502     /* Performance optimization: Count how often a surface is locked, if it is
503      * locked regularly do not throw away the system memory copy. This avoids
504      * the need to download the surface from OpenGL all the time. The surface
505      * is still downloaded if the OpenGL texture is changed. */
506     if (!(surface->flags & SFLAG_DYNLOCK))
507     {
508         if (++surface->lockCount > MAXLOCKCOUNT)
509         {
510             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
511             surface->flags |= SFLAG_DYNLOCK;
512         }
513     }
514
515     /* Create a PBO for dynamically locked surfaces but don't do it for
516      * converted or NPOT surfaces. Also don't create a PBO for systemmem
517      * surfaces. */
518     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
519             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
520             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
521     {
522         struct wined3d_context *context;
523         GLenum error;
524
525         context = context_acquire(device, NULL);
526         ENTER_GL();
527
528         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
529         error = glGetError();
530         if (!surface->pbo || error != GL_NO_ERROR)
531             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
532
533         TRACE("Binding PBO %u.\n", surface->pbo);
534
535         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
536         checkGLcall("glBindBufferARB");
537
538         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
539                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
540         checkGLcall("glBufferDataARB");
541
542         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
543         checkGLcall("glBindBufferARB");
544
545         /* We don't need the system memory anymore and we can't even use it for PBOs. */
546         if (!(surface->flags & SFLAG_CLIENT))
547         {
548             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
549             surface->resource.heapMemory = NULL;
550         }
551         surface->resource.allocatedMemory = NULL;
552         surface->flags |= SFLAG_PBO;
553         LEAVE_GL();
554         context_release(context);
555     }
556     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
557     {
558         /* Whatever surface we have, make sure that there is memory allocated
559          * for the downloaded copy, or a PBO to map. */
560         if (!surface->resource.heapMemory)
561             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
562
563         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
564                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
565
566         if (surface->flags & SFLAG_INSYSMEM)
567             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
568     }
569 }
570
571 static void surface_evict_sysmem(struct wined3d_surface *surface)
572 {
573     if (surface->flags & SFLAG_DONOTFREE)
574         return;
575
576     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
577     surface->resource.allocatedMemory = NULL;
578     surface->resource.heapMemory = NULL;
579     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
580 }
581
582 /* Context activation is done by the caller. */
583 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
584         const struct wined3d_gl_info *gl_info, BOOL srgb)
585 {
586     struct wined3d_device *device = surface->resource.device;
587     DWORD active_sampler;
588     GLint active_texture;
589
590     /* We don't need a specific texture unit, but after binding the texture
591      * the current unit is dirty. Read the unit back instead of switching to
592      * 0, this avoids messing around with the state manager's GL states. The
593      * current texture unit should always be a valid one.
594      *
595      * To be more specific, this is tricky because we can implicitly be
596      * called from sampler() in state.c. This means we can't touch anything
597      * other than whatever happens to be the currently active texture, or we
598      * would risk marking already applied sampler states dirty again.
599      *
600      * TODO: Track the current active texture per GL context instead of using
601      * glGet(). */
602
603     ENTER_GL();
604     glGetIntegerv(GL_ACTIVE_TEXTURE, &active_texture);
605     LEAVE_GL();
606     active_sampler = device->rev_tex_unit_map[active_texture - GL_TEXTURE0_ARB];
607
608     if (active_sampler != WINED3D_UNMAPPED_STAGE)
609         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
610     surface_bind(surface, gl_info, srgb);
611 }
612
613 static void surface_force_reload(struct wined3d_surface *surface)
614 {
615     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
616 }
617
618 static void surface_release_client_storage(struct wined3d_surface *surface)
619 {
620     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
621
622     ENTER_GL();
623     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
624     if (surface->texture_name)
625     {
626         surface_bind_and_dirtify(surface, context->gl_info, FALSE);
627         glTexImage2D(surface->texture_target, surface->texture_level,
628                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
629     }
630     if (surface->texture_name_srgb)
631     {
632         surface_bind_and_dirtify(surface, context->gl_info, TRUE);
633         glTexImage2D(surface->texture_target, surface->texture_level,
634                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
635     }
636     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
637     LEAVE_GL();
638
639     context_release(context);
640
641     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
642     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
643     surface_force_reload(surface);
644 }
645
646 static HRESULT surface_private_setup(struct wined3d_surface *surface)
647 {
648     /* TODO: Check against the maximum texture sizes supported by the video card. */
649     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
650     unsigned int pow2Width, pow2Height;
651
652     TRACE("surface %p.\n", surface);
653
654     surface->texture_name = 0;
655     surface->texture_target = GL_TEXTURE_2D;
656
657     /* Non-power2 support */
658     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
659     {
660         pow2Width = surface->resource.width;
661         pow2Height = surface->resource.height;
662     }
663     else
664     {
665         /* Find the nearest pow2 match */
666         pow2Width = pow2Height = 1;
667         while (pow2Width < surface->resource.width)
668             pow2Width <<= 1;
669         while (pow2Height < surface->resource.height)
670             pow2Height <<= 1;
671     }
672     surface->pow2Width = pow2Width;
673     surface->pow2Height = pow2Height;
674
675     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
676     {
677         /* TODO: Add support for non power two compressed textures. */
678         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
679         {
680             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
681                   surface, surface->resource.width, surface->resource.height);
682             return WINED3DERR_NOTAVAILABLE;
683         }
684     }
685
686     if (pow2Width != surface->resource.width
687             || pow2Height != surface->resource.height)
688     {
689         surface->flags |= SFLAG_NONPOW2;
690     }
691
692     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
693             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
694     {
695         /* One of three options:
696          * 1: Do the same as we do with NPOT and scale the texture, (any
697          *    texture ops would require the texture to be scaled which is
698          *    potentially slow)
699          * 2: Set the texture to the maximum size (bad idea).
700          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
701          * 4: Create the surface, but allow it to be used only for DirectDraw
702          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
703          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
704          *    the render target. */
705         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
706         {
707             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
708             return WINED3DERR_NOTAVAILABLE;
709         }
710
711         /* We should never use this surface in combination with OpenGL! */
712         TRACE("Creating an oversized surface: %ux%u.\n",
713                 surface->pow2Width, surface->pow2Height);
714     }
715     else
716     {
717         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
718          * and EXT_PALETTED_TEXTURE is used in combination with texture
719          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
720          * EXT_PALETTED_TEXTURE doesn't work in combination with
721          * ARB_TEXTURE_RECTANGLE. */
722         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
723                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
724                 && gl_info->supported[EXT_PALETTED_TEXTURE]
725                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
726         {
727             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
728             surface->pow2Width = surface->resource.width;
729             surface->pow2Height = surface->resource.height;
730             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
731         }
732     }
733
734     switch (wined3d_settings.offscreen_rendering_mode)
735     {
736         case ORM_FBO:
737             surface->get_drawable_size = get_drawable_size_fbo;
738             break;
739
740         case ORM_BACKBUFFER:
741             surface->get_drawable_size = get_drawable_size_backbuffer;
742             break;
743
744         default:
745             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
746             return WINED3DERR_INVALIDCALL;
747     }
748
749     surface->flags |= SFLAG_INSYSMEM;
750
751     return WINED3D_OK;
752 }
753
754 static void surface_realize_palette(struct wined3d_surface *surface)
755 {
756     struct wined3d_palette *palette = surface->palette;
757
758     TRACE("surface %p.\n", surface);
759
760     if (!palette) return;
761
762     if (surface->resource.format->id == WINED3DFMT_P8_UINT
763             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
764     {
765         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
766         {
767             /* Make sure the texture is up to date. This call doesn't do
768              * anything if the texture is already up to date. */
769             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
770
771             /* We want to force a palette refresh, so mark the drawable as not being up to date */
772             if (!surface_is_offscreen(surface))
773                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
774         }
775         else
776         {
777             if (!(surface->flags & SFLAG_INSYSMEM))
778             {
779                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
780                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
781             }
782             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
783         }
784     }
785
786     if (surface->flags & SFLAG_DIBSECTION)
787     {
788         RGBQUAD col[256];
789         unsigned int i;
790
791         TRACE("Updating the DC's palette.\n");
792
793         for (i = 0; i < 256; ++i)
794         {
795             col[i].rgbRed   = palette->palents[i].peRed;
796             col[i].rgbGreen = palette->palents[i].peGreen;
797             col[i].rgbBlue  = palette->palents[i].peBlue;
798             col[i].rgbReserved = 0;
799         }
800         SetDIBColorTable(surface->hDC, 0, 256, col);
801     }
802
803     /* Propagate the changes to the drawable when we have a palette. */
804     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
805         surface_load_location(surface, SFLAG_INDRAWABLE, NULL);
806 }
807
808 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
809 {
810     HRESULT hr;
811
812     /* If there's no destination surface there is nothing to do. */
813     if (!surface->overlay_dest)
814         return WINED3D_OK;
815
816     /* Blt calls ModifyLocation on the dest surface, which in turn calls
817      * DrawOverlay to update the overlay. Prevent an endless recursion. */
818     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
819         return WINED3D_OK;
820
821     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
822     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
823             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
824     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
825
826     return hr;
827 }
828
829 static void surface_preload(struct wined3d_surface *surface)
830 {
831     TRACE("surface %p.\n", surface);
832
833     surface_internal_preload(surface, SRGB_ANY);
834 }
835
836 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
837 {
838     struct wined3d_device *device = surface->resource.device;
839     const RECT *pass_rect = rect;
840
841     TRACE("surface %p, rect %s, flags %#x.\n",
842             surface, wine_dbgstr_rect(rect), flags);
843
844     if (flags & WINED3DLOCK_DISCARD)
845     {
846         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
847         surface_prepare_system_memory(surface);
848         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
849     }
850     else
851     {
852         /* surface_load_location() does not check if the rectangle specifies
853          * the full surface. Most callers don't need that, so do it here. */
854         if (rect && !rect->top && !rect->left
855                 && rect->right == surface->resource.width
856                 && rect->bottom == surface->resource.height)
857             pass_rect = NULL;
858
859         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
860                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
861                 || surface == device->fb.render_targets[0])))
862             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
863     }
864
865     if (surface->flags & SFLAG_PBO)
866     {
867         const struct wined3d_gl_info *gl_info;
868         struct wined3d_context *context;
869
870         context = context_acquire(device, NULL);
871         gl_info = context->gl_info;
872
873         ENTER_GL();
874         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
875         checkGLcall("glBindBufferARB");
876
877         /* This shouldn't happen but could occur if some other function
878          * didn't handle the PBO properly. */
879         if (surface->resource.allocatedMemory)
880             ERR("The surface already has PBO memory allocated.\n");
881
882         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
883         checkGLcall("glMapBufferARB");
884
885         /* Make sure the PBO isn't set anymore in order not to break non-PBO
886          * calls. */
887         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
888         checkGLcall("glBindBufferARB");
889
890         LEAVE_GL();
891         context_release(context);
892     }
893
894     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
895     {
896         if (!rect)
897             surface_add_dirty_rect(surface, NULL);
898         else
899         {
900             WINED3DBOX b;
901
902             b.Left = rect->left;
903             b.Top = rect->top;
904             b.Right = rect->right;
905             b.Bottom = rect->bottom;
906             b.Front = 0;
907             b.Back = 1;
908             surface_add_dirty_rect(surface, &b);
909         }
910     }
911 }
912
913 static void surface_unmap(struct wined3d_surface *surface)
914 {
915     struct wined3d_device *device = surface->resource.device;
916     BOOL fullsurface;
917
918     TRACE("surface %p.\n", surface);
919
920     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
921
922     if (surface->flags & SFLAG_PBO)
923     {
924         const struct wined3d_gl_info *gl_info;
925         struct wined3d_context *context;
926
927         TRACE("Freeing PBO memory.\n");
928
929         context = context_acquire(device, NULL);
930         gl_info = context->gl_info;
931
932         ENTER_GL();
933         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
934         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
935         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
936         checkGLcall("glUnmapBufferARB");
937         LEAVE_GL();
938         context_release(context);
939
940         surface->resource.allocatedMemory = NULL;
941     }
942
943     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
944
945     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
946     {
947         TRACE("Not dirtified, nothing to do.\n");
948         goto done;
949     }
950
951     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
952             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
953     {
954         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
955         {
956             static BOOL warned = FALSE;
957             if (!warned)
958             {
959                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
960                 warned = TRUE;
961             }
962             goto done;
963         }
964
965         if (!surface->dirtyRect.left && !surface->dirtyRect.top
966                 && surface->dirtyRect.right == surface->resource.width
967                 && surface->dirtyRect.bottom == surface->resource.height)
968         {
969             fullsurface = TRUE;
970         }
971         else
972         {
973             /* TODO: Proper partial rectangle tracking. */
974             fullsurface = FALSE;
975             surface->flags |= SFLAG_INSYSMEM;
976         }
977
978         surface_load_location(surface, SFLAG_INDRAWABLE, fullsurface ? NULL : &surface->dirtyRect);
979
980         /* Partial rectangle tracking is not commonly implemented, it is only
981          * done for render targets. INSYSMEM was set before to tell
982          * surface_load_location() where to read the rectangle from.
983          * Indrawable is set because all modifications from the partial
984          * sysmem copy are written back to the drawable, thus the surface is
985          * merged again in the drawable. The sysmem copy is not fully up to
986          * date because only a subrectangle was read in Map(). */
987         if (!fullsurface)
988         {
989             surface_modify_location(surface, SFLAG_INDRAWABLE, TRUE);
990             surface_evict_sysmem(surface);
991         }
992
993         surface->dirtyRect.left = surface->resource.width;
994         surface->dirtyRect.top = surface->resource.height;
995         surface->dirtyRect.right = 0;
996         surface->dirtyRect.bottom = 0;
997     }
998     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
999     {
1000         FIXME("Depth / stencil buffer locking is not implemented.\n");
1001     }
1002
1003 done:
1004     /* Overlays have to be redrawn manually after changes with the GL implementation */
1005     if (surface->overlay_dest)
1006         surface->surface_ops->surface_draw_overlay(surface);
1007 }
1008
1009 static HRESULT surface_getdc(struct wined3d_surface *surface)
1010 {
1011     WINED3DLOCKED_RECT lock;
1012     HRESULT hr;
1013
1014     TRACE("surface %p.\n", surface);
1015
1016     /* Create a DIB section if there isn't a dc yet. */
1017     if (!surface->hDC)
1018     {
1019         if (surface->flags & SFLAG_CLIENT)
1020         {
1021             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1022             surface_release_client_storage(surface);
1023         }
1024         hr = surface_create_dib_section(surface);
1025         if (FAILED(hr))
1026             return WINED3DERR_INVALIDCALL;
1027
1028         /* Use the DIB section from now on if we are not using a PBO. */
1029         if (!(surface->flags & SFLAG_PBO))
1030             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1031     }
1032
1033     /* Map the surface. */
1034     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1035     if (FAILED(hr))
1036         ERR("Map failed, hr %#x.\n", hr);
1037
1038     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1039      * activates the allocatedMemory. */
1040     if (surface->flags & SFLAG_PBO)
1041         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->dib.bitmap_size);
1042
1043     return hr;
1044 }
1045
1046 static HRESULT surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1047 {
1048     TRACE("surface %p, override %p.\n", surface, override);
1049
1050     /* Flipping is only supported on render targets and overlays. */
1051     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
1052     {
1053         WARN("Tried to flip a non-render target, non-overlay surface.\n");
1054         return WINEDDERR_NOTFLIPPABLE;
1055     }
1056
1057     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1058     {
1059         flip_surface(surface, override);
1060
1061         /* Update the overlay if it is visible */
1062         if (surface->overlay_dest)
1063             return surface->surface_ops->surface_draw_overlay(surface);
1064         else
1065             return WINED3D_OK;
1066     }
1067
1068     return WINED3D_OK;
1069 }
1070
1071 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1072 {
1073     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1074         return FALSE;
1075     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1076         return FALSE;
1077     return TRUE;
1078 }
1079
1080 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1081         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1082 {
1083     const struct wined3d_gl_info *gl_info;
1084     struct wined3d_context *context;
1085     DWORD src_mask, dst_mask;
1086     GLbitfield gl_mask;
1087
1088     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1089             device, src_surface, wine_dbgstr_rect(src_rect),
1090             dst_surface, wine_dbgstr_rect(dst_rect));
1091
1092     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1093     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1094
1095     if (src_mask != dst_mask)
1096     {
1097         ERR("Incompatible formats %s and %s.\n",
1098                 debug_d3dformat(src_surface->resource.format->id),
1099                 debug_d3dformat(dst_surface->resource.format->id));
1100         return;
1101     }
1102
1103     if (!src_mask)
1104     {
1105         ERR("Not a depth / stencil format: %s.\n",
1106                 debug_d3dformat(src_surface->resource.format->id));
1107         return;
1108     }
1109
1110     gl_mask = 0;
1111     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1112         gl_mask |= GL_DEPTH_BUFFER_BIT;
1113     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1114         gl_mask |= GL_STENCIL_BUFFER_BIT;
1115
1116     /* Make sure the locations are up-to-date. Loading the destination
1117      * surface isn't required if the entire surface is overwritten. */
1118     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1119     if (!surface_is_full_rect(dst_surface, dst_rect))
1120         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1121
1122     context = context_acquire(device, NULL);
1123     if (!context->valid)
1124     {
1125         context_release(context);
1126         WARN("Invalid context, skipping blit.\n");
1127         return;
1128     }
1129
1130     gl_info = context->gl_info;
1131
1132     ENTER_GL();
1133
1134     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1135     glReadBuffer(GL_NONE);
1136     checkGLcall("glReadBuffer()");
1137     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1138
1139     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1140     context_set_draw_buffer(context, GL_NONE);
1141     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1142
1143     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1144     {
1145         glDepthMask(GL_TRUE);
1146         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1147     }
1148     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1149     {
1150         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1151         {
1152             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1153             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1154         }
1155         glStencilMask(~0U);
1156         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1157     }
1158
1159     glDisable(GL_SCISSOR_TEST);
1160     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1161
1162     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1163             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1164     checkGLcall("glBlitFramebuffer()");
1165
1166     LEAVE_GL();
1167
1168     if (wined3d_settings.strict_draw_ordering)
1169         wglFlush(); /* Flush to ensure ordering across contexts. */
1170
1171     context_release(context);
1172 }
1173
1174 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1175         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1176         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1177 {
1178     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1179         return FALSE;
1180
1181     /* Source and/or destination need to be on the GL side */
1182     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1183         return FALSE;
1184
1185     switch (blit_op)
1186     {
1187         case WINED3D_BLIT_OP_COLOR_BLIT:
1188             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1189                 return FALSE;
1190             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1191                 return FALSE;
1192             break;
1193
1194         case WINED3D_BLIT_OP_DEPTH_BLIT:
1195             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1196                 return FALSE;
1197             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1198                 return FALSE;
1199             break;
1200
1201         default:
1202             return FALSE;
1203     }
1204
1205     if (!(src_format->id == dst_format->id
1206             || (is_identity_fixup(src_format->color_fixup)
1207             && is_identity_fixup(dst_format->color_fixup))))
1208         return FALSE;
1209
1210     return TRUE;
1211 }
1212
1213 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1214 {
1215     const struct wined3d_format *format = surface->resource.format;
1216
1217     switch (format->id)
1218     {
1219         case WINED3DFMT_S1_UINT_D15_UNORM:
1220             *float_depth = depth / (float)0x00007fff;
1221             break;
1222
1223         case WINED3DFMT_D16_UNORM:
1224             *float_depth = depth / (float)0x0000ffff;
1225             break;
1226
1227         case WINED3DFMT_D24_UNORM_S8_UINT:
1228         case WINED3DFMT_X8D24_UNORM:
1229             *float_depth = depth / (float)0x00ffffff;
1230             break;
1231
1232         case WINED3DFMT_D32_UNORM:
1233             *float_depth = depth / (float)0xffffffff;
1234             break;
1235
1236         default:
1237             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1238             return FALSE;
1239     }
1240
1241     return TRUE;
1242 }
1243
1244 /* Do not call while under the GL lock. */
1245 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1246 {
1247     const struct wined3d_resource *resource = &surface->resource;
1248     struct wined3d_device *device = resource->device;
1249     const struct blit_shader *blitter;
1250
1251     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1252             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1253     if (!blitter)
1254     {
1255         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1256         return WINED3DERR_INVALIDCALL;
1257     }
1258
1259     return blitter->depth_fill(device, surface, rect, depth);
1260 }
1261
1262 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1263         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1264 {
1265     struct wined3d_device *device = src_surface->resource.device;
1266
1267     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1268             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1269             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1270         return WINED3DERR_INVALIDCALL;
1271
1272     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1273
1274     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1275             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1276     surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
1277
1278     return WINED3D_OK;
1279 }
1280
1281 /* Do not call while under the GL lock. */
1282 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1283         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1284         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1285 {
1286     const struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1287     struct wined3d_device *device = dst_surface->resource.device;
1288     DWORD src_ds_flags, dst_ds_flags;
1289     RECT src_rect, dst_rect;
1290
1291     static const DWORD simple_blit = WINEDDBLT_ASYNC
1292             | WINEDDBLT_COLORFILL
1293             | WINEDDBLT_WAIT
1294             | WINEDDBLT_DEPTHFILL
1295             | WINEDDBLT_DONOTWAIT;
1296
1297     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1298             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1299             flags, fx, debug_d3dtexturefiltertype(filter));
1300     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1301
1302     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1303     {
1304         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1305         return WINEDDERR_SURFACEBUSY;
1306     }
1307
1308     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1309
1310     /* The destination rect can be out of bounds on the condition
1311      * that a clipper is set for the surface. */
1312     if (!dst_surface->clipper && (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1313             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1314             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1315             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1316             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0))
1317     {
1318         WARN("Application gave us bad destination rectangle for blit without a clipper set.\n");
1319         return WINEDDERR_INVALIDRECT;
1320     }
1321
1322     if (src_surface)
1323     {
1324         surface_get_rect(src_surface, src_rect_in, &src_rect);
1325
1326         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1327                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1328                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1329                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1330                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1331         {
1332             WARN("Application gave us bad source rectangle for Blt.\n");
1333             return WINEDDERR_INVALIDRECT;
1334         }
1335     }
1336     else
1337     {
1338         memset(&src_rect, 0, sizeof(src_rect));
1339     }
1340
1341     if (!fx || !(fx->dwDDFX))
1342         flags &= ~WINEDDBLT_DDFX;
1343
1344     if (flags & WINEDDBLT_WAIT)
1345         flags &= ~WINEDDBLT_WAIT;
1346
1347     if (flags & WINEDDBLT_ASYNC)
1348     {
1349         static unsigned int once;
1350
1351         if (!once++)
1352             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1353         flags &= ~WINEDDBLT_ASYNC;
1354     }
1355
1356     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1357     if (flags & WINEDDBLT_DONOTWAIT)
1358     {
1359         static unsigned int once;
1360
1361         if (!once++)
1362             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1363         flags &= ~WINEDDBLT_DONOTWAIT;
1364     }
1365
1366     if (!device->d3d_initialized)
1367     {
1368         WARN("D3D not initialized, using fallback.\n");
1369         goto cpu;
1370     }
1371
1372     if (flags & ~simple_blit)
1373     {
1374         WARN("Using fallback for complex blit (%#x).\n", flags);
1375         goto fallback;
1376     }
1377
1378     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1379         src_swapchain = src_surface->container.u.swapchain;
1380     else
1381         src_swapchain = NULL;
1382
1383     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1384         dst_swapchain = dst_surface->container.u.swapchain;
1385     else
1386         dst_swapchain = NULL;
1387
1388     /* This isn't strictly needed. FBO blits for example could deal with
1389      * cross-swapchain blits by first downloading the source to a texture
1390      * before switching to the destination context. We just have this here to
1391      * not have to deal with the issue, since cross-swapchain blits should be
1392      * rare. */
1393     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1394     {
1395         FIXME("Using fallback for cross-swapchain blit.\n");
1396         goto fallback;
1397     }
1398
1399     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1400     if (src_surface)
1401         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1402     else
1403         src_ds_flags = 0;
1404
1405     if (src_ds_flags || dst_ds_flags)
1406     {
1407         if (flags & WINEDDBLT_DEPTHFILL)
1408         {
1409             float depth;
1410
1411             TRACE("Depth fill.\n");
1412
1413             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1414                 return WINED3DERR_INVALIDCALL;
1415
1416             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1417                 return WINED3D_OK;
1418         }
1419         else
1420         {
1421             /* Accessing depth / stencil surfaces is supposed to fail while in
1422              * a scene, except for fills, which seem to work. */
1423             if (device->inScene)
1424             {
1425                 WARN("Rejecting depth / stencil access while in scene.\n");
1426                 return WINED3DERR_INVALIDCALL;
1427             }
1428
1429             if (src_ds_flags != dst_ds_flags)
1430             {
1431                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1432                 return WINED3DERR_INVALIDCALL;
1433             }
1434
1435             if (src_rect.top || src_rect.left
1436                     || src_rect.bottom != src_surface->resource.height
1437                     || src_rect.right != src_surface->resource.width)
1438             {
1439                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1440                         wine_dbgstr_rect(&src_rect));
1441                 return WINED3DERR_INVALIDCALL;
1442             }
1443
1444             if (dst_rect.top || dst_rect.left
1445                     || dst_rect.bottom != dst_surface->resource.height
1446                     || dst_rect.right != dst_surface->resource.width)
1447             {
1448                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1449                         wine_dbgstr_rect(&src_rect));
1450                 return WINED3DERR_INVALIDCALL;
1451             }
1452
1453             if (src_surface->resource.height != dst_surface->resource.height
1454                     || src_surface->resource.width != dst_surface->resource.width)
1455             {
1456                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1457                 return WINED3DERR_INVALIDCALL;
1458             }
1459
1460             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1461                 return WINED3D_OK;
1462         }
1463     }
1464
1465 fallback:
1466
1467     /* Special cases for render targets. */
1468     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1469             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1470     {
1471         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1472                 src_surface, &src_rect, flags, fx, filter)))
1473             return WINED3D_OK;
1474     }
1475
1476 cpu:
1477
1478     /* For the rest call the X11 surface implementation. For render targets
1479      * this should be implemented OpenGL accelerated in BltOverride, other
1480      * blits are rather rare. */
1481     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1482 }
1483
1484 /* Do not call while under the GL lock. */
1485 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1486         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1487 {
1488     RECT src_rect, dst_rect;
1489     DWORD flags = 0;
1490
1491     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect_in %s, trans %#x.\n",
1492             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1493
1494     surface_get_rect(src_surface, src_rect_in, &src_rect);
1495
1496     dst_rect.left = dst_x;
1497     dst_rect.top = dst_y;
1498     dst_rect.right = dst_x + src_rect.right - src_rect.left;
1499     dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1500
1501     if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1502         flags |= WINEDDBLT_KEYSRC;
1503     if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1504         flags |= WINEDDBLT_KEYDEST;
1505     if (trans & WINEDDBLTFAST_WAIT)
1506         flags |= WINEDDBLT_WAIT;
1507     if (trans & WINEDDBLTFAST_DONOTWAIT)
1508         flags |= WINEDDBLT_DONOTWAIT;
1509
1510     return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT);
1511 }
1512
1513 static HRESULT surface_set_mem(struct wined3d_surface *surface, void *mem)
1514 {
1515     TRACE("surface %p, mem %p.\n", surface, mem);
1516
1517     if (mem && mem != surface->resource.allocatedMemory)
1518     {
1519         void *release = NULL;
1520
1521         /* Do I have to copy the old surface content? */
1522         if (surface->flags & SFLAG_DIBSECTION)
1523         {
1524             SelectObject(surface->hDC, surface->dib.holdbitmap);
1525             DeleteDC(surface->hDC);
1526             /* Release the DIB section. */
1527             DeleteObject(surface->dib.DIBsection);
1528             surface->dib.bitmap_data = NULL;
1529             surface->resource.allocatedMemory = NULL;
1530             surface->hDC = NULL;
1531             surface->flags &= ~SFLAG_DIBSECTION;
1532         }
1533         else if (!(surface->flags & SFLAG_USERPTR))
1534         {
1535             release = surface->resource.heapMemory;
1536             surface->resource.heapMemory = NULL;
1537         }
1538         surface->resource.allocatedMemory = mem;
1539         surface->flags |= SFLAG_USERPTR;
1540
1541         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
1542         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1543
1544         /* For client textures OpenGL has to be notified. */
1545         if (surface->flags & SFLAG_CLIENT)
1546             surface_release_client_storage(surface);
1547
1548         /* Now free the old memory if any. */
1549         HeapFree(GetProcessHeap(), 0, release);
1550     }
1551     else if (surface->flags & SFLAG_USERPTR)
1552     {
1553         /* Map and GetDC will re-create the dib section and allocated memory. */
1554         surface->resource.allocatedMemory = NULL;
1555         /* HeapMemory should be NULL already. */
1556         if (surface->resource.heapMemory)
1557             ERR("User pointer surface has heap memory allocated.\n");
1558         surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
1559
1560         if (surface->flags & SFLAG_CLIENT)
1561             surface_release_client_storage(surface);
1562
1563         surface_prepare_system_memory(surface);
1564         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1565     }
1566
1567     return WINED3D_OK;
1568 }
1569
1570 /* Context activation is done by the caller. */
1571 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1572 {
1573     if (!surface->resource.heapMemory)
1574     {
1575         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1576         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1577                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1578     }
1579
1580     ENTER_GL();
1581     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1582     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1583     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1584             surface->resource.size, surface->resource.allocatedMemory));
1585     checkGLcall("glGetBufferSubDataARB");
1586     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1587     checkGLcall("glDeleteBuffersARB");
1588     LEAVE_GL();
1589
1590     surface->pbo = 0;
1591     surface->flags &= ~SFLAG_PBO;
1592 }
1593
1594 /* Do not call while under the GL lock. */
1595 static void surface_unload(struct wined3d_resource *resource)
1596 {
1597     struct wined3d_surface *surface = surface_from_resource(resource);
1598     struct wined3d_renderbuffer_entry *entry, *entry2;
1599     struct wined3d_device *device = resource->device;
1600     const struct wined3d_gl_info *gl_info;
1601     struct wined3d_context *context;
1602
1603     TRACE("surface %p.\n", surface);
1604
1605     if (resource->pool == WINED3DPOOL_DEFAULT)
1606     {
1607         /* Default pool resources are supposed to be destroyed before Reset is called.
1608          * Implicit resources stay however. So this means we have an implicit render target
1609          * or depth stencil. The content may be destroyed, but we still have to tear down
1610          * opengl resources, so we cannot leave early.
1611          *
1612          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1613          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1614          * or the depth stencil into an FBO the texture or render buffer will be removed
1615          * and all flags get lost
1616          */
1617         surface_init_sysmem(surface);
1618     }
1619     else
1620     {
1621         /* Load the surface into system memory */
1622         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1623         surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
1624     }
1625     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1626     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1627     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1628
1629     context = context_acquire(device, NULL);
1630     gl_info = context->gl_info;
1631
1632     /* Destroy PBOs, but load them into real sysmem before */
1633     if (surface->flags & SFLAG_PBO)
1634         surface_remove_pbo(surface, gl_info);
1635
1636     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1637      * all application-created targets the application has to release the surface
1638      * before calling _Reset
1639      */
1640     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1641     {
1642         ENTER_GL();
1643         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1644         LEAVE_GL();
1645         list_remove(&entry->entry);
1646         HeapFree(GetProcessHeap(), 0, entry);
1647     }
1648     list_init(&surface->renderbuffers);
1649     surface->current_renderbuffer = NULL;
1650
1651     /* If we're in a texture, the texture name belongs to the texture.
1652      * Otherwise, destroy it. */
1653     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1654     {
1655         ENTER_GL();
1656         glDeleteTextures(1, &surface->texture_name);
1657         surface->texture_name = 0;
1658         glDeleteTextures(1, &surface->texture_name_srgb);
1659         surface->texture_name_srgb = 0;
1660         LEAVE_GL();
1661     }
1662
1663     context_release(context);
1664
1665     resource_unload(resource);
1666 }
1667
1668 static const struct wined3d_resource_ops surface_resource_ops =
1669 {
1670     surface_unload,
1671 };
1672
1673 static const struct wined3d_surface_ops surface_ops =
1674 {
1675     surface_private_setup,
1676     surface_cleanup,
1677     surface_realize_palette,
1678     surface_draw_overlay,
1679     surface_preload,
1680     surface_map,
1681     surface_unmap,
1682     surface_getdc,
1683     surface_flip,
1684     surface_set_mem,
1685 };
1686
1687 /*****************************************************************************
1688  * Initializes the GDI surface, aka creates the DIB section we render to
1689  * The DIB section creation is done by calling GetDC, which will create the
1690  * section and releasing the dc to allow the app to use it. The dib section
1691  * will stay until the surface is released
1692  *
1693  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1694  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1695  * avoid confusion in the shared surface code.
1696  *
1697  * Returns:
1698  *  WINED3D_OK on success
1699  *  The return values of called methods on failure
1700  *
1701  *****************************************************************************/
1702 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1703 {
1704     HRESULT hr;
1705
1706     TRACE("surface %p.\n", surface);
1707
1708     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1709     {
1710         ERR("Overlays not yet supported by GDI surfaces.\n");
1711         return WINED3DERR_INVALIDCALL;
1712     }
1713
1714     /* Sysmem textures have memory already allocated - release it,
1715      * this avoids an unnecessary memcpy. */
1716     hr = surface_create_dib_section(surface);
1717     if (SUCCEEDED(hr))
1718     {
1719         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1720         surface->resource.heapMemory = NULL;
1721         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1722     }
1723
1724     /* We don't mind the nonpow2 stuff in GDI. */
1725     surface->pow2Width = surface->resource.width;
1726     surface->pow2Height = surface->resource.height;
1727
1728     return WINED3D_OK;
1729 }
1730
1731 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1732 {
1733     TRACE("surface %p.\n", surface);
1734
1735     if (surface->flags & SFLAG_DIBSECTION)
1736     {
1737         /* Release the DC. */
1738         SelectObject(surface->hDC, surface->dib.holdbitmap);
1739         DeleteDC(surface->hDC);
1740         /* Release the DIB section. */
1741         DeleteObject(surface->dib.DIBsection);
1742         surface->dib.bitmap_data = NULL;
1743         surface->resource.allocatedMemory = NULL;
1744     }
1745
1746     if (surface->flags & SFLAG_USERPTR)
1747         wined3d_surface_set_mem(surface, NULL);
1748     if (surface->overlay_dest)
1749         list_remove(&surface->overlay_entry);
1750
1751     HeapFree(GetProcessHeap(), 0, surface->palette9);
1752
1753     resource_cleanup(&surface->resource);
1754 }
1755
1756 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1757 {
1758     struct wined3d_palette *palette = surface->palette;
1759
1760     TRACE("surface %p.\n", surface);
1761
1762     if (!palette) return;
1763
1764     if (surface->flags & SFLAG_DIBSECTION)
1765     {
1766         RGBQUAD col[256];
1767         unsigned int i;
1768
1769         TRACE("Updating the DC's palette.\n");
1770
1771         for (i = 0; i < 256; ++i)
1772         {
1773             col[i].rgbRed = palette->palents[i].peRed;
1774             col[i].rgbGreen = palette->palents[i].peGreen;
1775             col[i].rgbBlue = palette->palents[i].peBlue;
1776             col[i].rgbReserved = 0;
1777         }
1778         SetDIBColorTable(surface->hDC, 0, 256, col);
1779     }
1780
1781     /* Update the image because of the palette change. Some games like e.g.
1782      * Red Alert call SetEntries a lot to implement fading. */
1783     /* Tell the swapchain to update the screen. */
1784     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1785     {
1786         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1787         if (surface == swapchain->front_buffer)
1788         {
1789             x11_copy_to_screen(swapchain, NULL);
1790         }
1791     }
1792 }
1793
1794 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
1795 {
1796     FIXME("GDI surfaces can't draw overlays yet.\n");
1797     return E_FAIL;
1798 }
1799
1800 static void gdi_surface_preload(struct wined3d_surface *surface)
1801 {
1802     TRACE("surface %p.\n", surface);
1803
1804     ERR("Preloading GDI surfaces is not supported.\n");
1805 }
1806
1807 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1808 {
1809     TRACE("surface %p, rect %s, flags %#x.\n",
1810             surface, wine_dbgstr_rect(rect), flags);
1811
1812     if (!surface->resource.allocatedMemory)
1813     {
1814         /* This happens on gdi surfaces if the application set a user pointer
1815          * and resets it. Recreate the DIB section. */
1816         surface_create_dib_section(surface);
1817         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1818     }
1819 }
1820
1821 static void gdi_surface_unmap(struct wined3d_surface *surface)
1822 {
1823     TRACE("surface %p.\n", surface);
1824
1825     /* Tell the swapchain to update the screen. */
1826     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1827     {
1828         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1829         if (surface == swapchain->front_buffer)
1830         {
1831             x11_copy_to_screen(swapchain, &surface->lockedRect);
1832         }
1833     }
1834
1835     memset(&surface->lockedRect, 0, sizeof(RECT));
1836 }
1837
1838 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
1839 {
1840     WINED3DLOCKED_RECT lock;
1841     HRESULT hr;
1842
1843     TRACE("surface %p.\n", surface);
1844
1845     /* Should have a DIB section already. */
1846     if (!(surface->flags & SFLAG_DIBSECTION))
1847     {
1848         WARN("DC not supported on this surface\n");
1849         return WINED3DERR_INVALIDCALL;
1850     }
1851
1852     /* Map the surface. */
1853     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1854     if (FAILED(hr))
1855         ERR("Map failed, hr %#x.\n", hr);
1856
1857     return hr;
1858 }
1859
1860 static HRESULT gdi_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1861 {
1862     TRACE("surface %p, override %p.\n", surface, override);
1863
1864     return WINED3D_OK;
1865 }
1866
1867 static HRESULT gdi_surface_set_mem(struct wined3d_surface *surface, void *mem)
1868 {
1869     TRACE("surface %p, mem %p.\n", surface, mem);
1870
1871     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
1872     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1873     {
1874         ERR("Not supported on render targets.\n");
1875         return WINED3DERR_INVALIDCALL;
1876     }
1877
1878     if (mem && mem != surface->resource.allocatedMemory)
1879     {
1880         void *release = NULL;
1881
1882         /* Do I have to copy the old surface content? */
1883         if (surface->flags & SFLAG_DIBSECTION)
1884         {
1885             SelectObject(surface->hDC, surface->dib.holdbitmap);
1886             DeleteDC(surface->hDC);
1887             /* Release the DIB section. */
1888             DeleteObject(surface->dib.DIBsection);
1889             surface->dib.bitmap_data = NULL;
1890             surface->resource.allocatedMemory = NULL;
1891             surface->hDC = NULL;
1892             surface->flags &= ~SFLAG_DIBSECTION;
1893         }
1894         else if (!(surface->flags & SFLAG_USERPTR))
1895         {
1896             release = surface->resource.allocatedMemory;
1897         }
1898         surface->resource.allocatedMemory = mem;
1899         surface->flags |= SFLAG_USERPTR | SFLAG_INSYSMEM;
1900
1901         /* Now free the old memory, if any. */
1902         HeapFree(GetProcessHeap(), 0, release);
1903     }
1904     else if (surface->flags & SFLAG_USERPTR)
1905     {
1906         /* Map() and GetDC() will re-create the dib section and allocated memory. */
1907         surface->resource.allocatedMemory = NULL;
1908         surface->flags &= ~SFLAG_USERPTR;
1909     }
1910
1911     return WINED3D_OK;
1912 }
1913
1914 static const struct wined3d_surface_ops gdi_surface_ops =
1915 {
1916     gdi_surface_private_setup,
1917     surface_gdi_cleanup,
1918     gdi_surface_realize_palette,
1919     gdi_surface_draw_overlay,
1920     gdi_surface_preload,
1921     gdi_surface_map,
1922     gdi_surface_unmap,
1923     gdi_surface_getdc,
1924     gdi_surface_flip,
1925     gdi_surface_set_mem,
1926 };
1927
1928 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
1929 {
1930     GLuint *name;
1931     DWORD flag;
1932
1933     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
1934
1935     if(srgb)
1936     {
1937         name = &surface->texture_name_srgb;
1938         flag = SFLAG_INSRGBTEX;
1939     }
1940     else
1941     {
1942         name = &surface->texture_name;
1943         flag = SFLAG_INTEXTURE;
1944     }
1945
1946     if (!*name && new_name)
1947     {
1948         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
1949          * surface has no texture name yet. See if we can get rid of this. */
1950         if (surface->flags & flag)
1951             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
1952         surface_modify_location(surface, flag, FALSE);
1953     }
1954
1955     *name = new_name;
1956     surface_force_reload(surface);
1957 }
1958
1959 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
1960 {
1961     TRACE("surface %p, target %#x.\n", surface, target);
1962
1963     if (surface->texture_target != target)
1964     {
1965         if (target == GL_TEXTURE_RECTANGLE_ARB)
1966         {
1967             surface->flags &= ~SFLAG_NORMCOORD;
1968         }
1969         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
1970         {
1971             surface->flags |= SFLAG_NORMCOORD;
1972         }
1973     }
1974     surface->texture_target = target;
1975     surface_force_reload(surface);
1976 }
1977
1978 /* Context activation is done by the caller. */
1979 void surface_bind(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
1980 {
1981     TRACE("surface %p, gl_info %p, srgb %#x.\n", surface, gl_info, srgb);
1982
1983     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
1984     {
1985         struct wined3d_texture *texture = surface->container.u.texture;
1986
1987         TRACE("Passing to container (%p).\n", texture);
1988         texture->texture_ops->texture_bind(texture, gl_info, srgb);
1989     }
1990     else
1991     {
1992         if (surface->texture_level)
1993         {
1994             ERR("Standalone surface %p is non-zero texture level %u.\n",
1995                     surface, surface->texture_level);
1996         }
1997
1998         if (srgb)
1999             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2000
2001         ENTER_GL();
2002
2003         if (!surface->texture_name)
2004         {
2005             glGenTextures(1, &surface->texture_name);
2006             checkGLcall("glGenTextures");
2007
2008             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2009
2010             glBindTexture(surface->texture_target, surface->texture_name);
2011             checkGLcall("glBindTexture");
2012             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2013             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2014             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2015             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2016             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2017             checkGLcall("glTexParameteri");
2018         }
2019         else
2020         {
2021             glBindTexture(surface->texture_target, surface->texture_name);
2022             checkGLcall("glBindTexture");
2023         }
2024
2025         LEAVE_GL();
2026     }
2027 }
2028
2029 /* This function checks if the primary render target uses the 8bit paletted format. */
2030 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
2031 {
2032     if (device->fb.render_targets && device->fb.render_targets[0])
2033     {
2034         const struct wined3d_surface *render_target = device->fb.render_targets[0];
2035         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
2036                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
2037             return TRUE;
2038     }
2039     return FALSE;
2040 }
2041
2042 /* This call just downloads data, the caller is responsible for binding the
2043  * correct texture. */
2044 /* Context activation is done by the caller. */
2045 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2046 {
2047     const struct wined3d_format *format = surface->resource.format;
2048
2049     /* Only support read back of converted P8 surfaces. */
2050     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2051     {
2052         FIXME("Readback conversion not supported for format %s.\n", debug_d3dformat(format->id));
2053         return;
2054     }
2055
2056     ENTER_GL();
2057
2058     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2059     {
2060         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2061                 surface, surface->texture_level, format->glFormat, format->glType,
2062                 surface->resource.allocatedMemory);
2063
2064         if (surface->flags & SFLAG_PBO)
2065         {
2066             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2067             checkGLcall("glBindBufferARB");
2068             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2069             checkGLcall("glGetCompressedTexImageARB");
2070             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2071             checkGLcall("glBindBufferARB");
2072         }
2073         else
2074         {
2075             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2076                     surface->texture_level, surface->resource.allocatedMemory));
2077             checkGLcall("glGetCompressedTexImageARB");
2078         }
2079
2080         LEAVE_GL();
2081     }
2082     else
2083     {
2084         void *mem;
2085         GLenum gl_format = format->glFormat;
2086         GLenum gl_type = format->glType;
2087         int src_pitch = 0;
2088         int dst_pitch = 0;
2089
2090         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2091         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2092         {
2093             gl_format = GL_ALPHA;
2094             gl_type = GL_UNSIGNED_BYTE;
2095         }
2096
2097         if (surface->flags & SFLAG_NONPOW2)
2098         {
2099             unsigned char alignment = surface->resource.device->surface_alignment;
2100             src_pitch = format->byte_count * surface->pow2Width;
2101             dst_pitch = wined3d_surface_get_pitch(surface);
2102             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2103             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2104         }
2105         else
2106         {
2107             mem = surface->resource.allocatedMemory;
2108         }
2109
2110         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2111                 surface, surface->texture_level, gl_format, gl_type, mem);
2112
2113         if (surface->flags & SFLAG_PBO)
2114         {
2115             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2116             checkGLcall("glBindBufferARB");
2117
2118             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2119             checkGLcall("glGetTexImage");
2120
2121             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2122             checkGLcall("glBindBufferARB");
2123         }
2124         else
2125         {
2126             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2127             checkGLcall("glGetTexImage");
2128         }
2129         LEAVE_GL();
2130
2131         if (surface->flags & SFLAG_NONPOW2)
2132         {
2133             const BYTE *src_data;
2134             BYTE *dst_data;
2135             UINT y;
2136             /*
2137              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2138              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2139              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2140              *
2141              * We're doing this...
2142              *
2143              * instead of boxing the texture :
2144              * |<-texture width ->|  -->pow2width|   /\
2145              * |111111111111111111|              |   |
2146              * |222 Texture 222222| boxed empty  | texture height
2147              * |3333 Data 33333333|              |   |
2148              * |444444444444444444|              |   \/
2149              * -----------------------------------   |
2150              * |     boxed  empty | boxed empty  | pow2height
2151              * |                  |              |   \/
2152              * -----------------------------------
2153              *
2154              *
2155              * we're repacking the data to the expected texture width
2156              *
2157              * |<-texture width ->|  -->pow2width|   /\
2158              * |111111111111111111222222222222222|   |
2159              * |222333333333333333333444444444444| texture height
2160              * |444444                           |   |
2161              * |                                 |   \/
2162              * |                                 |   |
2163              * |            empty                | pow2height
2164              * |                                 |   \/
2165              * -----------------------------------
2166              *
2167              * == is the same as
2168              *
2169              * |<-texture width ->|    /\
2170              * |111111111111111111|
2171              * |222222222222222222|texture height
2172              * |333333333333333333|
2173              * |444444444444444444|    \/
2174              * --------------------
2175              *
2176              * this also means that any references to allocatedMemory should work with the data as if were a
2177              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2178              *
2179              * internally the texture is still stored in a boxed format so any references to textureName will
2180              * get a boxed texture with width pow2width and not a texture of width resource.width.
2181              *
2182              * Performance should not be an issue, because applications normally do not lock the surfaces when
2183              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2184              * and doesn't have to be re-read. */
2185             src_data = mem;
2186             dst_data = surface->resource.allocatedMemory;
2187             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2188             for (y = 1; y < surface->resource.height; ++y)
2189             {
2190                 /* skip the first row */
2191                 src_data += src_pitch;
2192                 dst_data += dst_pitch;
2193                 memcpy(dst_data, src_data, dst_pitch);
2194             }
2195
2196             HeapFree(GetProcessHeap(), 0, mem);
2197         }
2198     }
2199
2200     /* Surface has now been downloaded */
2201     surface->flags |= SFLAG_INSYSMEM;
2202 }
2203
2204 /* This call just uploads data, the caller is responsible for binding the
2205  * correct texture. */
2206 /* Context activation is done by the caller. */
2207 void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2208         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2209         BOOL srgb, const struct wined3d_bo_address *data)
2210 {
2211     UINT update_w = src_rect->right - src_rect->left;
2212     UINT update_h = src_rect->bottom - src_rect->top;
2213
2214     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2215             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2216             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2217
2218     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2219         update_h *= format->heightscale;
2220
2221     ENTER_GL();
2222
2223     if (data->buffer_object)
2224     {
2225         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2226         checkGLcall("glBindBufferARB");
2227     }
2228
2229     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2230     {
2231         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2232         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2233         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2234         const BYTE *addr = data->addr;
2235         GLenum internal;
2236
2237         addr += (src_rect->top / format->block_height) * src_pitch;
2238         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2239
2240         if (srgb)
2241             internal = format->glGammaInternal;
2242         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2243             internal = format->rtInternal;
2244         else
2245             internal = format->glInternal;
2246
2247         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2248                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2249                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2250
2251         if (row_length == src_pitch)
2252         {
2253             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2254                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2255         }
2256         else
2257         {
2258             UINT row, y;
2259
2260             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2261              * can't use the unpack row length like below. */
2262             for (row = 0, y = dst_point->y; row < row_count; ++row)
2263             {
2264                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2265                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2266                 y += format->block_height;
2267                 addr += src_pitch;
2268             }
2269         }
2270         checkGLcall("glCompressedTexSubImage2DARB");
2271     }
2272     else
2273     {
2274         const BYTE *addr = data->addr;
2275
2276         addr += src_rect->top * src_w * format->byte_count;
2277         addr += src_rect->left * format->byte_count;
2278
2279         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2280                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2281                 update_w, update_h, format->glFormat, format->glType, addr);
2282
2283         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2284         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2285                 update_w, update_h, format->glFormat, format->glType, addr);
2286         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2287         checkGLcall("glTexSubImage2D");
2288     }
2289
2290     if (data->buffer_object)
2291     {
2292         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2293         checkGLcall("glBindBufferARB");
2294     }
2295
2296     LEAVE_GL();
2297
2298     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2299     {
2300         struct wined3d_device *device = surface->resource.device;
2301         unsigned int i;
2302
2303         for (i = 0; i < device->context_count; ++i)
2304         {
2305             context_surface_update(device->contexts[i], surface);
2306         }
2307     }
2308 }
2309
2310 /* This call just allocates the texture, the caller is responsible for binding
2311  * the correct texture. */
2312 /* Context activation is done by the caller. */
2313 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2314         const struct wined3d_format *format, BOOL srgb)
2315 {
2316     BOOL enable_client_storage = FALSE;
2317     GLsizei width = surface->pow2Width;
2318     GLsizei height = surface->pow2Height;
2319     const BYTE *mem = NULL;
2320     GLenum internal;
2321
2322     if (srgb)
2323     {
2324         internal = format->glGammaInternal;
2325     }
2326     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2327     {
2328         internal = format->rtInternal;
2329     }
2330     else
2331     {
2332         internal = format->glInternal;
2333     }
2334
2335     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2336
2337     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2338             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2339             internal, width, height, format->glFormat, format->glType);
2340
2341     ENTER_GL();
2342
2343     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2344     {
2345         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2346                 || !surface->resource.allocatedMemory)
2347         {
2348             /* In some cases we want to disable client storage.
2349              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2350              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2351              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2352              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2353              */
2354             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2355             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2356             surface->flags &= ~SFLAG_CLIENT;
2357             enable_client_storage = TRUE;
2358         }
2359         else
2360         {
2361             surface->flags |= SFLAG_CLIENT;
2362
2363             /* Point OpenGL to our allocated texture memory. Do not use
2364              * resource.allocatedMemory here because it might point into a
2365              * PBO. Instead use heapMemory, but get the alignment right. */
2366             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2367                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2368         }
2369     }
2370
2371     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2372     {
2373         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2374                 internal, width, height, 0, surface->resource.size, mem));
2375         checkGLcall("glCompressedTexImage2DARB");
2376     }
2377     else
2378     {
2379         glTexImage2D(surface->texture_target, surface->texture_level,
2380                 internal, width, height, 0, format->glFormat, format->glType, mem);
2381         checkGLcall("glTexImage2D");
2382     }
2383
2384     if(enable_client_storage) {
2385         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2386         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2387     }
2388     LEAVE_GL();
2389 }
2390
2391 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2392  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2393 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2394 /* GL locking is done by the caller */
2395 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2396 {
2397     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2398     struct wined3d_renderbuffer_entry *entry;
2399     GLuint renderbuffer = 0;
2400     unsigned int src_width, src_height;
2401     unsigned int width, height;
2402
2403     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2404     {
2405         width = rt->pow2Width;
2406         height = rt->pow2Height;
2407     }
2408     else
2409     {
2410         width = surface->pow2Width;
2411         height = surface->pow2Height;
2412     }
2413
2414     src_width = surface->pow2Width;
2415     src_height = surface->pow2Height;
2416
2417     /* A depth stencil smaller than the render target is not valid */
2418     if (width > src_width || height > src_height) return;
2419
2420     /* Remove any renderbuffer set if the sizes match */
2421     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2422             || (width == src_width && height == src_height))
2423     {
2424         surface->current_renderbuffer = NULL;
2425         return;
2426     }
2427
2428     /* Look if we've already got a renderbuffer of the correct dimensions */
2429     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2430     {
2431         if (entry->width == width && entry->height == height)
2432         {
2433             renderbuffer = entry->id;
2434             surface->current_renderbuffer = entry;
2435             break;
2436         }
2437     }
2438
2439     if (!renderbuffer)
2440     {
2441         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2442         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2443         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2444                 surface->resource.format->glInternal, width, height);
2445
2446         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2447         entry->width = width;
2448         entry->height = height;
2449         entry->id = renderbuffer;
2450         list_add_head(&surface->renderbuffers, &entry->entry);
2451
2452         surface->current_renderbuffer = entry;
2453     }
2454
2455     checkGLcall("set_compatible_renderbuffer");
2456 }
2457
2458 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2459 {
2460     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2461
2462     TRACE("surface %p.\n", surface);
2463
2464     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2465     {
2466         ERR("Surface %p is not on a swapchain.\n", surface);
2467         return GL_NONE;
2468     }
2469
2470     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2471     {
2472         if (swapchain->render_to_fbo)
2473         {
2474             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2475             return GL_COLOR_ATTACHMENT0;
2476         }
2477         TRACE("Returning GL_BACK\n");
2478         return GL_BACK;
2479     }
2480     else if (surface == swapchain->front_buffer)
2481     {
2482         TRACE("Returning GL_FRONT\n");
2483         return GL_FRONT;
2484     }
2485
2486     FIXME("Higher back buffer, returning GL_BACK\n");
2487     return GL_BACK;
2488 }
2489
2490 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2491 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2492 {
2493     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2494
2495     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2496         /* No partial locking for textures yet. */
2497         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2498
2499     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2500     if (dirty_rect)
2501     {
2502         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2503         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2504         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2505         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2506     }
2507     else
2508     {
2509         surface->dirtyRect.left = 0;
2510         surface->dirtyRect.top = 0;
2511         surface->dirtyRect.right = surface->resource.width;
2512         surface->dirtyRect.bottom = surface->resource.height;
2513     }
2514
2515     /* if the container is a texture then mark it dirty. */
2516     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2517     {
2518         TRACE("Passing to container.\n");
2519         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2520     }
2521 }
2522
2523 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
2524         DWORD color, WINED3DCOLORVALUE *float_color)
2525 {
2526     const struct wined3d_format *format = surface->resource.format;
2527     const struct wined3d_device *device = surface->resource.device;
2528
2529     switch (format->id)
2530     {
2531         case WINED3DFMT_P8_UINT:
2532             if (surface->palette)
2533             {
2534                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
2535                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
2536                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
2537             }
2538             else
2539             {
2540                 float_color->r = 0.0f;
2541                 float_color->g = 0.0f;
2542                 float_color->b = 0.0f;
2543             }
2544             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
2545             break;
2546
2547         case WINED3DFMT_B5G6R5_UNORM:
2548             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
2549             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
2550             float_color->b = (color & 0x1f) / 31.0f;
2551             float_color->a = 1.0f;
2552             break;
2553
2554         case WINED3DFMT_B8G8R8_UNORM:
2555         case WINED3DFMT_B8G8R8X8_UNORM:
2556             float_color->r = D3DCOLOR_R(color);
2557             float_color->g = D3DCOLOR_G(color);
2558             float_color->b = D3DCOLOR_B(color);
2559             float_color->a = 1.0f;
2560             break;
2561
2562         case WINED3DFMT_B8G8R8A8_UNORM:
2563             float_color->r = D3DCOLOR_R(color);
2564             float_color->g = D3DCOLOR_G(color);
2565             float_color->b = D3DCOLOR_B(color);
2566             float_color->a = D3DCOLOR_A(color);
2567             break;
2568
2569         default:
2570             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
2571             return FALSE;
2572     }
2573
2574     return TRUE;
2575 }
2576
2577 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2578 {
2579     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2580     BOOL ck_changed;
2581
2582     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2583
2584     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2585     {
2586         ERR("Not supported on scratch surfaces.\n");
2587         return WINED3DERR_INVALIDCALL;
2588     }
2589
2590     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2591
2592     /* Reload if either the texture and sysmem have different ideas about the
2593      * color key, or the actual key values changed. */
2594     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2595             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2596             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2597     {
2598         TRACE("Reloading because of color keying\n");
2599         /* To perform the color key conversion we need a sysmem copy of
2600          * the surface. Make sure we have it. */
2601
2602         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2603         /* Make sure the texture is reloaded because of the color key change,
2604          * this kills performance though :( */
2605         /* TODO: This is not necessarily needed with hw palettized texture support. */
2606         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2607         /* Switching color keying on / off may change the internal format. */
2608         if (ck_changed)
2609             surface_force_reload(surface);
2610     }
2611     else if (!(surface->flags & flag))
2612     {
2613         TRACE("Reloading because surface is dirty.\n");
2614     }
2615     else
2616     {
2617         TRACE("surface is already in texture\n");
2618         return WINED3D_OK;
2619     }
2620
2621     /* No partial locking for textures yet. */
2622     surface_load_location(surface, flag, NULL);
2623     surface_evict_sysmem(surface);
2624
2625     return WINED3D_OK;
2626 }
2627
2628 /* See also float_16_to_32() in wined3d_private.h */
2629 static inline unsigned short float_32_to_16(const float *in)
2630 {
2631     int exp = 0;
2632     float tmp = fabsf(*in);
2633     unsigned int mantissa;
2634     unsigned short ret;
2635
2636     /* Deal with special numbers */
2637     if (*in == 0.0f)
2638         return 0x0000;
2639     if (isnan(*in))
2640         return 0x7c01;
2641     if (isinf(*in))
2642         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2643
2644     if (tmp < powf(2, 10))
2645     {
2646         do
2647         {
2648             tmp = tmp * 2.0f;
2649             exp--;
2650         } while (tmp < powf(2, 10));
2651     }
2652     else if (tmp >= powf(2, 11))
2653     {
2654         do
2655         {
2656             tmp /= 2.0f;
2657             exp++;
2658         } while (tmp >= powf(2, 11));
2659     }
2660
2661     mantissa = (unsigned int)tmp;
2662     if (tmp - mantissa >= 0.5f)
2663         ++mantissa; /* Round to nearest, away from zero. */
2664
2665     exp += 10;  /* Normalize the mantissa. */
2666     exp += 15;  /* Exponent is encoded with excess 15. */
2667
2668     if (exp > 30) /* too big */
2669     {
2670         ret = 0x7c00; /* INF */
2671     }
2672     else if (exp <= 0)
2673     {
2674         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2675         while (exp <= 0)
2676         {
2677             mantissa = mantissa >> 1;
2678             ++exp;
2679         }
2680         ret = mantissa & 0x3ff;
2681     }
2682     else
2683     {
2684         ret = (exp << 10) | (mantissa & 0x3ff);
2685     }
2686
2687     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2688     return ret;
2689 }
2690
2691 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2692 {
2693     ULONG refcount;
2694
2695     TRACE("Surface %p, container %p of type %#x.\n",
2696             surface, surface->container.u.base, surface->container.type);
2697
2698     switch (surface->container.type)
2699     {
2700         case WINED3D_CONTAINER_TEXTURE:
2701             return wined3d_texture_incref(surface->container.u.texture);
2702
2703         case WINED3D_CONTAINER_SWAPCHAIN:
2704             return wined3d_swapchain_incref(surface->container.u.swapchain);
2705
2706         default:
2707             ERR("Unhandled container type %#x.\n", surface->container.type);
2708         case WINED3D_CONTAINER_NONE:
2709             break;
2710     }
2711
2712     refcount = InterlockedIncrement(&surface->resource.ref);
2713     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2714
2715     return refcount;
2716 }
2717
2718 /* Do not call while under the GL lock. */
2719 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2720 {
2721     ULONG refcount;
2722
2723     TRACE("Surface %p, container %p of type %#x.\n",
2724             surface, surface->container.u.base, surface->container.type);
2725
2726     switch (surface->container.type)
2727     {
2728         case WINED3D_CONTAINER_TEXTURE:
2729             return wined3d_texture_decref(surface->container.u.texture);
2730
2731         case WINED3D_CONTAINER_SWAPCHAIN:
2732             return wined3d_swapchain_decref(surface->container.u.swapchain);
2733
2734         default:
2735             ERR("Unhandled container type %#x.\n", surface->container.type);
2736         case WINED3D_CONTAINER_NONE:
2737             break;
2738     }
2739
2740     refcount = InterlockedDecrement(&surface->resource.ref);
2741     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2742
2743     if (!refcount)
2744     {
2745         surface->surface_ops->surface_cleanup(surface);
2746         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2747
2748         TRACE("Destroyed surface %p.\n", surface);
2749         HeapFree(GetProcessHeap(), 0, surface);
2750     }
2751
2752     return refcount;
2753 }
2754
2755 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2756 {
2757     return resource_set_priority(&surface->resource, priority);
2758 }
2759
2760 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2761 {
2762     return resource_get_priority(&surface->resource);
2763 }
2764
2765 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2766 {
2767     TRACE("surface %p.\n", surface);
2768
2769     surface->surface_ops->surface_preload(surface);
2770 }
2771
2772 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2773 {
2774     TRACE("surface %p.\n", surface);
2775
2776     return surface->resource.parent;
2777 }
2778
2779 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2780 {
2781     TRACE("surface %p.\n", surface);
2782
2783     return &surface->resource;
2784 }
2785
2786 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2787 {
2788     TRACE("surface %p, flags %#x.\n", surface, flags);
2789
2790     switch (flags)
2791     {
2792         case WINEDDGBS_CANBLT:
2793         case WINEDDGBS_ISBLTDONE:
2794             return WINED3D_OK;
2795
2796         default:
2797             return WINED3DERR_INVALIDCALL;
2798     }
2799 }
2800
2801 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2802 {
2803     TRACE("surface %p, flags %#x.\n", surface, flags);
2804
2805     /* XXX: DDERR_INVALIDSURFACETYPE */
2806
2807     switch (flags)
2808     {
2809         case WINEDDGFS_CANFLIP:
2810         case WINEDDGFS_ISFLIPDONE:
2811             return WINED3D_OK;
2812
2813         default:
2814             return WINED3DERR_INVALIDCALL;
2815     }
2816 }
2817
2818 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2819 {
2820     TRACE("surface %p.\n", surface);
2821
2822     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2823     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2824 }
2825
2826 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2827 {
2828     TRACE("surface %p.\n", surface);
2829
2830     /* So far we don't lose anything :) */
2831     surface->flags &= ~SFLAG_LOST;
2832     return WINED3D_OK;
2833 }
2834
2835 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2836 {
2837     TRACE("surface %p, palette %p.\n", surface, palette);
2838
2839     if (surface->palette == palette)
2840     {
2841         TRACE("Nop palette change.\n");
2842         return WINED3D_OK;
2843     }
2844
2845     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2846         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2847
2848     surface->palette = palette;
2849
2850     if (palette)
2851     {
2852         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2853             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2854
2855         surface->surface_ops->surface_realize_palette(surface);
2856     }
2857
2858     return WINED3D_OK;
2859 }
2860
2861 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2862         DWORD flags, const WINEDDCOLORKEY *color_key)
2863 {
2864     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
2865
2866     if (flags & WINEDDCKEY_COLORSPACE)
2867     {
2868         FIXME(" colorkey value not supported (%08x) !\n", flags);
2869         return WINED3DERR_INVALIDCALL;
2870     }
2871
2872     /* Dirtify the surface, but only if a key was changed. */
2873     if (color_key)
2874     {
2875         switch (flags & ~WINEDDCKEY_COLORSPACE)
2876         {
2877             case WINEDDCKEY_DESTBLT:
2878                 surface->DestBltCKey = *color_key;
2879                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
2880                 break;
2881
2882             case WINEDDCKEY_DESTOVERLAY:
2883                 surface->DestOverlayCKey = *color_key;
2884                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
2885                 break;
2886
2887             case WINEDDCKEY_SRCOVERLAY:
2888                 surface->SrcOverlayCKey = *color_key;
2889                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
2890                 break;
2891
2892             case WINEDDCKEY_SRCBLT:
2893                 surface->SrcBltCKey = *color_key;
2894                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
2895                 break;
2896         }
2897     }
2898     else
2899     {
2900         switch (flags & ~WINEDDCKEY_COLORSPACE)
2901         {
2902             case WINEDDCKEY_DESTBLT:
2903                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
2904                 break;
2905
2906             case WINEDDCKEY_DESTOVERLAY:
2907                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
2908                 break;
2909
2910             case WINEDDCKEY_SRCOVERLAY:
2911                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
2912                 break;
2913
2914             case WINEDDCKEY_SRCBLT:
2915                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
2916                 break;
2917         }
2918     }
2919
2920     return WINED3D_OK;
2921 }
2922
2923 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
2924 {
2925     TRACE("surface %p.\n", surface);
2926
2927     return surface->palette;
2928 }
2929
2930 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
2931 {
2932     const struct wined3d_format *format = surface->resource.format;
2933     DWORD pitch;
2934
2935     TRACE("surface %p.\n", surface);
2936
2937     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
2938     {
2939         /* Since compressed formats are block based, pitch means the amount of
2940          * bytes to the next row of block rather than the next row of pixels. */
2941         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
2942         pitch = row_block_count * format->block_byte_count;
2943     }
2944     else
2945     {
2946         unsigned char alignment = surface->resource.device->surface_alignment;
2947         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
2948         pitch = (pitch + alignment - 1) & ~(alignment - 1);
2949     }
2950
2951     TRACE("Returning %u.\n", pitch);
2952
2953     return pitch;
2954 }
2955
2956 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
2957 {
2958     TRACE("surface %p, mem %p.\n", surface, mem);
2959
2960     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
2961     {
2962         WARN("Surface is locked or the DC is in use.\n");
2963         return WINED3DERR_INVALIDCALL;
2964     }
2965
2966     return surface->surface_ops->surface_set_mem(surface, mem);
2967 }
2968
2969 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
2970 {
2971     LONG w, h;
2972
2973     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
2974
2975     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2976     {
2977         WARN("Not an overlay surface.\n");
2978         return WINEDDERR_NOTAOVERLAYSURFACE;
2979     }
2980
2981     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
2982     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
2983     surface->overlay_destrect.left = x;
2984     surface->overlay_destrect.top = y;
2985     surface->overlay_destrect.right = x + w;
2986     surface->overlay_destrect.bottom = y + h;
2987
2988     surface->surface_ops->surface_draw_overlay(surface);
2989
2990     return WINED3D_OK;
2991 }
2992
2993 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
2994 {
2995     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
2996
2997     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2998     {
2999         TRACE("Not an overlay surface.\n");
3000         return WINEDDERR_NOTAOVERLAYSURFACE;
3001     }
3002
3003     if (!surface->overlay_dest)
3004     {
3005         TRACE("Overlay not visible.\n");
3006         *x = 0;
3007         *y = 0;
3008         return WINEDDERR_OVERLAYNOTVISIBLE;
3009     }
3010
3011     *x = surface->overlay_destrect.left;
3012     *y = surface->overlay_destrect.top;
3013
3014     TRACE("Returning position %d, %d.\n", *x, *y);
3015
3016     return WINED3D_OK;
3017 }
3018
3019 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3020         DWORD flags, struct wined3d_surface *ref)
3021 {
3022     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3023
3024     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3025     {
3026         TRACE("Not an overlay surface.\n");
3027         return WINEDDERR_NOTAOVERLAYSURFACE;
3028     }
3029
3030     return WINED3D_OK;
3031 }
3032
3033 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3034         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3035 {
3036     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3037             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3038
3039     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3040     {
3041         WARN("Not an overlay surface.\n");
3042         return WINEDDERR_NOTAOVERLAYSURFACE;
3043     }
3044     else if (!dst_surface)
3045     {
3046         WARN("Dest surface is NULL.\n");
3047         return WINED3DERR_INVALIDCALL;
3048     }
3049
3050     if (src_rect)
3051     {
3052         surface->overlay_srcrect = *src_rect;
3053     }
3054     else
3055     {
3056         surface->overlay_srcrect.left = 0;
3057         surface->overlay_srcrect.top = 0;
3058         surface->overlay_srcrect.right = surface->resource.width;
3059         surface->overlay_srcrect.bottom = surface->resource.height;
3060     }
3061
3062     if (dst_rect)
3063     {
3064         surface->overlay_destrect = *dst_rect;
3065     }
3066     else
3067     {
3068         surface->overlay_destrect.left = 0;
3069         surface->overlay_destrect.top = 0;
3070         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3071         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3072     }
3073
3074     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3075     {
3076         list_remove(&surface->overlay_entry);
3077     }
3078
3079     if (flags & WINEDDOVER_SHOW)
3080     {
3081         if (surface->overlay_dest != dst_surface)
3082         {
3083             surface->overlay_dest = dst_surface;
3084             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3085         }
3086     }
3087     else if (flags & WINEDDOVER_HIDE)
3088     {
3089         /* tests show that the rectangles are erased on hide */
3090         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3091         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3092         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3093         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3094         surface->overlay_dest = NULL;
3095     }
3096
3097     surface->surface_ops->surface_draw_overlay(surface);
3098
3099     return WINED3D_OK;
3100 }
3101
3102 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3103 {
3104     TRACE("surface %p, clipper %p.\n", surface, clipper);
3105
3106     surface->clipper = clipper;
3107
3108     return WINED3D_OK;
3109 }
3110
3111 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3112 {
3113     TRACE("surface %p.\n", surface);
3114
3115     return surface->clipper;
3116 }
3117
3118 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3119 {
3120     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3121
3122     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3123
3124     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3125     {
3126         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3127         return WINED3DERR_INVALIDCALL;
3128     }
3129
3130     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3131             surface->pow2Width, surface->pow2Height);
3132     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3133     surface->resource.format = format;
3134
3135     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3136     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3137             format->glFormat, format->glInternal, format->glType);
3138
3139     return WINED3D_OK;
3140 }
3141
3142 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3143         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3144 {
3145     unsigned short *dst_s;
3146     const float *src_f;
3147     unsigned int x, y;
3148
3149     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3150
3151     for (y = 0; y < h; ++y)
3152     {
3153         src_f = (const float *)(src + y * pitch_in);
3154         dst_s = (unsigned short *) (dst + y * pitch_out);
3155         for (x = 0; x < w; ++x)
3156         {
3157             dst_s[x] = float_32_to_16(src_f + x);
3158         }
3159     }
3160 }
3161
3162 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3163         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3164 {
3165     static const unsigned char convert_5to8[] =
3166     {
3167         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3168         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3169         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3170         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3171     };
3172     static const unsigned char convert_6to8[] =
3173     {
3174         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3175         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3176         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3177         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3178         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3179         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3180         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3181         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3182     };
3183     unsigned int x, y;
3184
3185     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3186
3187     for (y = 0; y < h; ++y)
3188     {
3189         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3190         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3191         for (x = 0; x < w; ++x)
3192         {
3193             WORD pixel = src_line[x];
3194             dst_line[x] = 0xff000000
3195                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3196                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3197                     | convert_5to8[(pixel & 0x001f)];
3198         }
3199     }
3200 }
3201
3202 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3203         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3204 {
3205     unsigned int x, y;
3206
3207     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3208
3209     for (y = 0; y < h; ++y)
3210     {
3211         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3212         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3213
3214         for (x = 0; x < w; ++x)
3215         {
3216             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3217         }
3218     }
3219 }
3220
3221 static inline BYTE cliptobyte(int x)
3222 {
3223     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3224 }
3225
3226 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3227         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3228 {
3229     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3230     unsigned int x, y;
3231
3232     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3233
3234     for (y = 0; y < h; ++y)
3235     {
3236         const BYTE *src_line = src + y * pitch_in;
3237         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3238         for (x = 0; x < w; ++x)
3239         {
3240             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3241              *     C = Y - 16; D = U - 128; E = V - 128;
3242              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3243              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3244              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3245              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3246              * U and V are shared between the pixels. */
3247             if (!(x & 1)) /* For every even pixel, read new U and V. */
3248             {
3249                 d = (int) src_line[1] - 128;
3250                 e = (int) src_line[3] - 128;
3251                 r2 = 409 * e + 128;
3252                 g2 = - 100 * d - 208 * e + 128;
3253                 b2 = 516 * d + 128;
3254             }
3255             c2 = 298 * ((int) src_line[0] - 16);
3256             dst_line[x] = 0xff000000
3257                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3258                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3259                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3260                 /* Scale RGB values to 0..255 range,
3261                  * then clip them if still not in range (may be negative),
3262                  * then shift them within DWORD if necessary. */
3263             src_line += 2;
3264         }
3265     }
3266 }
3267
3268 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3269         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3270 {
3271     unsigned int x, y;
3272     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3273
3274     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3275
3276     for (y = 0; y < h; ++y)
3277     {
3278         const BYTE *src_line = src + y * pitch_in;
3279         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3280         for (x = 0; x < w; ++x)
3281         {
3282             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3283              *     C = Y - 16; D = U - 128; E = V - 128;
3284              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3285              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3286              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3287              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3288              * U and V are shared between the pixels. */
3289             if (!(x & 1)) /* For every even pixel, read new U and V. */
3290             {
3291                 d = (int) src_line[1] - 128;
3292                 e = (int) src_line[3] - 128;
3293                 r2 = 409 * e + 128;
3294                 g2 = - 100 * d - 208 * e + 128;
3295                 b2 = 516 * d + 128;
3296             }
3297             c2 = 298 * ((int) src_line[0] - 16);
3298             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3299                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3300                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3301                 /* Scale RGB values to 0..255 range,
3302                  * then clip them if still not in range (may be negative),
3303                  * then shift them within DWORD if necessary. */
3304             src_line += 2;
3305         }
3306     }
3307 }
3308
3309 struct d3dfmt_convertor_desc
3310 {
3311     enum wined3d_format_id from, to;
3312     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3313 };
3314
3315 static const struct d3dfmt_convertor_desc convertors[] =
3316 {
3317     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3318     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3319     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3320     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3321     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3322 };
3323
3324 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3325         enum wined3d_format_id to)
3326 {
3327     unsigned int i;
3328
3329     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3330     {
3331         if (convertors[i].from == from && convertors[i].to == to)
3332             return &convertors[i];
3333     }
3334
3335     return NULL;
3336 }
3337
3338 /*****************************************************************************
3339  * surface_convert_format
3340  *
3341  * Creates a duplicate of a surface in a different format. Is used by Blt to
3342  * blit between surfaces with different formats.
3343  *
3344  * Parameters
3345  *  source: Source surface
3346  *  fmt: Requested destination format
3347  *
3348  *****************************************************************************/
3349 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3350 {
3351     const struct d3dfmt_convertor_desc *conv;
3352     WINED3DLOCKED_RECT lock_src, lock_dst;
3353     struct wined3d_surface *ret = NULL;
3354     HRESULT hr;
3355
3356     conv = find_convertor(source->resource.format->id, to_fmt);
3357     if (!conv)
3358     {
3359         FIXME("Cannot find a conversion function from format %s to %s.\n",
3360                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3361         return NULL;
3362     }
3363
3364     wined3d_surface_create(source->resource.device, source->resource.width,
3365             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3366             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3367             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3368     if (!ret)
3369     {
3370         ERR("Failed to create a destination surface for conversion.\n");
3371         return NULL;
3372     }
3373
3374     memset(&lock_src, 0, sizeof(lock_src));
3375     memset(&lock_dst, 0, sizeof(lock_dst));
3376
3377     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3378     if (FAILED(hr))
3379     {
3380         ERR("Failed to lock the source surface.\n");
3381         wined3d_surface_decref(ret);
3382         return NULL;
3383     }
3384     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3385     if (FAILED(hr))
3386     {
3387         ERR("Failed to lock the destination surface.\n");
3388         wined3d_surface_unmap(source);
3389         wined3d_surface_decref(ret);
3390         return NULL;
3391     }
3392
3393     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3394             source->resource.width, source->resource.height);
3395
3396     wined3d_surface_unmap(ret);
3397     wined3d_surface_unmap(source);
3398
3399     return ret;
3400 }
3401
3402 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3403         unsigned int bpp, UINT pitch, DWORD color)
3404 {
3405     BYTE *first;
3406     int x, y;
3407
3408     /* Do first row */
3409
3410 #define COLORFILL_ROW(type) \
3411 do { \
3412     type *d = (type *)buf; \
3413     for (x = 0; x < width; ++x) \
3414         d[x] = (type)color; \
3415 } while(0)
3416
3417     switch (bpp)
3418     {
3419         case 1:
3420             COLORFILL_ROW(BYTE);
3421             break;
3422
3423         case 2:
3424             COLORFILL_ROW(WORD);
3425             break;
3426
3427         case 3:
3428         {
3429             BYTE *d = buf;
3430             for (x = 0; x < width; ++x, d += 3)
3431             {
3432                 d[0] = (color      ) & 0xFF;
3433                 d[1] = (color >>  8) & 0xFF;
3434                 d[2] = (color >> 16) & 0xFF;
3435             }
3436             break;
3437         }
3438         case 4:
3439             COLORFILL_ROW(DWORD);
3440             break;
3441
3442         default:
3443             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3444             return WINED3DERR_NOTAVAILABLE;
3445     }
3446
3447 #undef COLORFILL_ROW
3448
3449     /* Now copy first row. */
3450     first = buf;
3451     for (y = 1; y < height; ++y)
3452     {
3453         buf += pitch;
3454         memcpy(buf, first, width * bpp);
3455     }
3456
3457     return WINED3D_OK;
3458 }
3459
3460 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3461 {
3462     TRACE("surface %p.\n", surface);
3463
3464     if (!(surface->flags & SFLAG_LOCKED))
3465     {
3466         WARN("Trying to unmap unmapped surface.\n");
3467         return WINEDDERR_NOTLOCKED;
3468     }
3469     surface->flags &= ~SFLAG_LOCKED;
3470
3471     surface->surface_ops->surface_unmap(surface);
3472
3473     return WINED3D_OK;
3474 }
3475
3476 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3477         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3478 {
3479     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3480             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3481
3482     if (surface->flags & SFLAG_LOCKED)
3483     {
3484         WARN("Surface is already mapped.\n");
3485         return WINED3DERR_INVALIDCALL;
3486     }
3487     surface->flags |= SFLAG_LOCKED;
3488
3489     if (!(surface->flags & SFLAG_LOCKABLE))
3490         WARN("Trying to lock unlockable surface.\n");
3491
3492     surface->surface_ops->surface_map(surface, rect, flags);
3493
3494     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3495
3496     if (!rect)
3497     {
3498         locked_rect->pBits = surface->resource.allocatedMemory;
3499         surface->lockedRect.left = 0;
3500         surface->lockedRect.top = 0;
3501         surface->lockedRect.right = surface->resource.width;
3502         surface->lockedRect.bottom = surface->resource.height;
3503     }
3504     else
3505     {
3506         const struct wined3d_format *format = surface->resource.format;
3507
3508         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3509         {
3510             /* Compressed textures are block based, so calculate the offset of
3511              * the block that contains the top-left pixel of the locked rectangle. */
3512             locked_rect->pBits = surface->resource.allocatedMemory
3513                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3514                     + ((rect->left / format->block_width) * format->block_byte_count);
3515         }
3516         else
3517         {
3518             locked_rect->pBits = surface->resource.allocatedMemory
3519                     + (locked_rect->Pitch * rect->top)
3520                     + (rect->left * format->byte_count);
3521         }
3522         surface->lockedRect.left = rect->left;
3523         surface->lockedRect.top = rect->top;
3524         surface->lockedRect.right = rect->right;
3525         surface->lockedRect.bottom = rect->bottom;
3526     }
3527
3528     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3529     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3530
3531     return WINED3D_OK;
3532 }
3533
3534 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3535 {
3536     HRESULT hr;
3537
3538     TRACE("surface %p, dc %p.\n", surface, dc);
3539
3540     if (surface->flags & SFLAG_USERPTR)
3541     {
3542         ERR("Not supported on surfaces with application-provided memory.\n");
3543         return WINEDDERR_NODC;
3544     }
3545
3546     /* Give more detailed info for ddraw. */
3547     if (surface->flags & SFLAG_DCINUSE)
3548         return WINEDDERR_DCALREADYCREATED;
3549
3550     /* Can't GetDC if the surface is locked. */
3551     if (surface->flags & SFLAG_LOCKED)
3552         return WINED3DERR_INVALIDCALL;
3553
3554     hr = surface->surface_ops->surface_getdc(surface);
3555     if (FAILED(hr))
3556         return hr;
3557
3558     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3559             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3560     {
3561         /* GetDC on palettized formats is unsupported in D3D9, and the method
3562          * is missing in D3D8, so this should only be used for DX <=7
3563          * surfaces (with non-device palettes). */
3564         const PALETTEENTRY *pal = NULL;
3565
3566         if (surface->palette)
3567         {
3568             pal = surface->palette->palents;
3569         }
3570         else
3571         {
3572             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3573             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3574
3575             if (dds_primary && dds_primary->palette)
3576                 pal = dds_primary->palette->palents;
3577         }
3578
3579         if (pal)
3580         {
3581             RGBQUAD col[256];
3582             unsigned int i;
3583
3584             for (i = 0; i < 256; ++i)
3585             {
3586                 col[i].rgbRed = pal[i].peRed;
3587                 col[i].rgbGreen = pal[i].peGreen;
3588                 col[i].rgbBlue = pal[i].peBlue;
3589                 col[i].rgbReserved = 0;
3590             }
3591             SetDIBColorTable(surface->hDC, 0, 256, col);
3592         }
3593     }
3594
3595     surface->flags |= SFLAG_DCINUSE;
3596
3597     *dc = surface->hDC;
3598     TRACE("Returning dc %p.\n", *dc);
3599
3600     return WINED3D_OK;
3601 }
3602
3603 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3604 {
3605     TRACE("surface %p, dc %p.\n", surface, dc);
3606
3607     if (!(surface->flags & SFLAG_DCINUSE))
3608         return WINEDDERR_NODC;
3609
3610     if (surface->hDC != dc)
3611     {
3612         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3613                 dc, surface->hDC);
3614         return WINEDDERR_NODC;
3615     }
3616
3617     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3618     {
3619         /* Copy the contents of the DIB over to the PBO. */
3620         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->dib.bitmap_size);
3621     }
3622
3623     /* We locked first, so unlock now. */
3624     wined3d_surface_unmap(surface);
3625
3626     surface->flags &= ~SFLAG_DCINUSE;
3627
3628     return WINED3D_OK;
3629 }
3630
3631 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3632 {
3633     struct wined3d_swapchain *swapchain;
3634     HRESULT hr;
3635
3636     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3637
3638     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
3639     {
3640         ERR("Flipped surface is not on a swapchain.\n");
3641         return WINEDDERR_NOTFLIPPABLE;
3642     }
3643     swapchain = surface->container.u.swapchain;
3644
3645     hr = surface->surface_ops->surface_flip(surface, override);
3646     if (FAILED(hr))
3647         return hr;
3648
3649     /* Just overwrite the swapchain presentation interval. This is ok because
3650      * only ddraw apps can call Flip, and only d3d8 and d3d9 applications
3651      * specify the presentation interval. */
3652     if (!(flags & (WINEDDFLIP_NOVSYNC | WINEDDFLIP_INTERVAL2 | WINEDDFLIP_INTERVAL3 | WINEDDFLIP_INTERVAL4)))
3653         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_ONE;
3654     else if (flags & WINEDDFLIP_NOVSYNC)
3655         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
3656     else if (flags & WINEDDFLIP_INTERVAL2)
3657         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_TWO;
3658     else if (flags & WINEDDFLIP_INTERVAL3)
3659         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_THREE;
3660     else
3661         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_FOUR;
3662
3663     return wined3d_swapchain_present(swapchain, NULL, NULL, swapchain->win_handle, NULL, 0);
3664 }
3665
3666 /* Do not call while under the GL lock. */
3667 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3668 {
3669     struct wined3d_device *device = surface->resource.device;
3670
3671     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3672
3673     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3674     {
3675         struct wined3d_texture *texture = surface->container.u.texture;
3676
3677         TRACE("Passing to container (%p).\n", texture);
3678         texture->texture_ops->texture_preload(texture, srgb);
3679     }
3680     else
3681     {
3682         struct wined3d_context *context = NULL;
3683
3684         TRACE("(%p) : About to load surface\n", surface);
3685
3686         if (!device->isInDraw) context = context_acquire(device, NULL);
3687
3688         if (surface->resource.format->id == WINED3DFMT_P8_UINT
3689                 || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3690         {
3691             if (palette9_changed(surface))
3692             {
3693                 TRACE("Reloading surface because the d3d8/9 palette was changed\n");
3694                 /* TODO: This is not necessarily needed with hw palettized texture support */
3695                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3696                 /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
3697                 surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
3698             }
3699         }
3700
3701         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3702
3703         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3704         {
3705             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3706             GLclampf tmp;
3707             tmp = 0.9f;
3708             ENTER_GL();
3709             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3710             LEAVE_GL();
3711         }
3712
3713         if (context) context_release(context);
3714     }
3715 }
3716
3717 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3718 {
3719     if (!surface->resource.allocatedMemory)
3720     {
3721         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3722                 surface->resource.size + RESOURCE_ALIGNMENT);
3723         if (!surface->resource.heapMemory)
3724         {
3725             ERR("Out of memory\n");
3726             return FALSE;
3727         }
3728         surface->resource.allocatedMemory =
3729             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3730     }
3731     else
3732     {
3733         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3734     }
3735
3736     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3737
3738     return TRUE;
3739 }
3740
3741 /* Read the framebuffer back into the surface */
3742 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3743 {
3744     struct wined3d_device *device = surface->resource.device;
3745     const struct wined3d_gl_info *gl_info;
3746     struct wined3d_context *context;
3747     BYTE *mem;
3748     GLint fmt;
3749     GLint type;
3750     BYTE *row, *top, *bottom;
3751     int i;
3752     BOOL bpp;
3753     RECT local_rect;
3754     BOOL srcIsUpsideDown;
3755     GLint rowLen = 0;
3756     GLint skipPix = 0;
3757     GLint skipRow = 0;
3758
3759     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3760         static BOOL warned = FALSE;
3761         if(!warned) {
3762             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3763             warned = TRUE;
3764         }
3765         return;
3766     }
3767
3768     context = context_acquire(device, surface);
3769     context_apply_blit_state(context, device);
3770     gl_info = context->gl_info;
3771
3772     ENTER_GL();
3773
3774     /* Select the correct read buffer, and give some debug output.
3775      * There is no need to keep track of the current read buffer or reset it, every part of the code
3776      * that reads sets the read buffer as desired.
3777      */
3778     if (surface_is_offscreen(surface))
3779     {
3780         /* Mapping the primary render target which is not on a swapchain.
3781          * Read from the back buffer. */
3782         TRACE("Mapping offscreen render target.\n");
3783         glReadBuffer(device->offscreenBuffer);
3784         srcIsUpsideDown = TRUE;
3785     }
3786     else
3787     {
3788         /* Onscreen surfaces are always part of a swapchain */
3789         GLenum buffer = surface_get_gl_buffer(surface);
3790         TRACE("Mapping %#x buffer.\n", buffer);
3791         glReadBuffer(buffer);
3792         checkGLcall("glReadBuffer");
3793         srcIsUpsideDown = FALSE;
3794     }
3795
3796     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3797     if (!rect)
3798     {
3799         local_rect.left = 0;
3800         local_rect.top = 0;
3801         local_rect.right = surface->resource.width;
3802         local_rect.bottom = surface->resource.height;
3803     }
3804     else
3805     {
3806         local_rect = *rect;
3807     }
3808     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3809
3810     switch (surface->resource.format->id)
3811     {
3812         case WINED3DFMT_P8_UINT:
3813         {
3814             if (primary_render_target_is_p8(device))
3815             {
3816                 /* In case of P8 render targets the index is stored in the alpha component */
3817                 fmt = GL_ALPHA;
3818                 type = GL_UNSIGNED_BYTE;
3819                 mem = dest;
3820                 bpp = surface->resource.format->byte_count;
3821             }
3822             else
3823             {
3824                 /* GL can't return palettized data, so read ARGB pixels into a
3825                  * separate block of memory and convert them into palettized format
3826                  * in software. Slow, but if the app means to use palettized render
3827                  * targets and locks it...
3828                  *
3829                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
3830                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
3831                  * for the color channels when palettizing the colors.
3832                  */
3833                 fmt = GL_RGB;
3834                 type = GL_UNSIGNED_BYTE;
3835                 pitch *= 3;
3836                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
3837                 if (!mem)
3838                 {
3839                     ERR("Out of memory\n");
3840                     LEAVE_GL();
3841                     return;
3842                 }
3843                 bpp = surface->resource.format->byte_count * 3;
3844             }
3845         }
3846         break;
3847
3848         default:
3849             mem = dest;
3850             fmt = surface->resource.format->glFormat;
3851             type = surface->resource.format->glType;
3852             bpp = surface->resource.format->byte_count;
3853     }
3854
3855     if (surface->flags & SFLAG_PBO)
3856     {
3857         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
3858         checkGLcall("glBindBufferARB");
3859         if (mem)
3860         {
3861             ERR("mem not null for pbo -- unexpected\n");
3862             mem = NULL;
3863         }
3864     }
3865
3866     /* Save old pixel store pack state */
3867     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
3868     checkGLcall("glGetIntegerv");
3869     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
3870     checkGLcall("glGetIntegerv");
3871     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
3872     checkGLcall("glGetIntegerv");
3873
3874     /* Setup pixel store pack state -- to glReadPixels into the correct place */
3875     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
3876     checkGLcall("glPixelStorei");
3877     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
3878     checkGLcall("glPixelStorei");
3879     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
3880     checkGLcall("glPixelStorei");
3881
3882     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
3883             local_rect.right - local_rect.left,
3884             local_rect.bottom - local_rect.top,
3885             fmt, type, mem);
3886     checkGLcall("glReadPixels");
3887
3888     /* Reset previous pixel store pack state */
3889     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
3890     checkGLcall("glPixelStorei");
3891     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
3892     checkGLcall("glPixelStorei");
3893     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
3894     checkGLcall("glPixelStorei");
3895
3896     if (surface->flags & SFLAG_PBO)
3897     {
3898         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
3899         checkGLcall("glBindBufferARB");
3900
3901         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
3902          * to get a pointer to it and perform the flipping in software. This is a lot
3903          * faster than calling glReadPixels for each line. In case we want more speed
3904          * we should rerender it flipped in a FBO and read the data back from the FBO. */
3905         if (!srcIsUpsideDown)
3906         {
3907             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
3908             checkGLcall("glBindBufferARB");
3909
3910             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
3911             checkGLcall("glMapBufferARB");
3912         }
3913     }
3914
3915     /* TODO: Merge this with the palettization loop below for P8 targets */
3916     if(!srcIsUpsideDown) {
3917         UINT len, off;
3918         /* glReadPixels returns the image upside down, and there is no way to prevent this.
3919             Flip the lines in software */
3920         len = (local_rect.right - local_rect.left) * bpp;
3921         off = local_rect.left * bpp;
3922
3923         row = HeapAlloc(GetProcessHeap(), 0, len);
3924         if(!row) {
3925             ERR("Out of memory\n");
3926             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
3927                 HeapFree(GetProcessHeap(), 0, mem);
3928             LEAVE_GL();
3929             return;
3930         }
3931
3932         top = mem + pitch * local_rect.top;
3933         bottom = mem + pitch * (local_rect.bottom - 1);
3934         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
3935             memcpy(row, top + off, len);
3936             memcpy(top + off, bottom + off, len);
3937             memcpy(bottom + off, row, len);
3938             top += pitch;
3939             bottom -= pitch;
3940         }
3941         HeapFree(GetProcessHeap(), 0, row);
3942
3943         /* Unmap the temp PBO buffer */
3944         if (surface->flags & SFLAG_PBO)
3945         {
3946             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
3947             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
3948         }
3949     }
3950
3951     LEAVE_GL();
3952     context_release(context);
3953
3954     /* For P8 textures we need to perform an inverse palette lookup. This is
3955      * done by searching for a palette index which matches the RGB value.
3956      * Note this isn't guaranteed to work when there are multiple entries for
3957      * the same color but we have no choice. In case of P8 render targets,
3958      * the index is stored in the alpha component so no conversion is needed. */
3959     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
3960     {
3961         const PALETTEENTRY *pal = NULL;
3962         DWORD width = pitch / 3;
3963         int x, y, c;
3964
3965         if (surface->palette)
3966         {
3967             pal = surface->palette->palents;
3968         }
3969         else
3970         {
3971             ERR("Palette is missing, cannot perform inverse palette lookup\n");
3972             HeapFree(GetProcessHeap(), 0, mem);
3973             return;
3974         }
3975
3976         for(y = local_rect.top; y < local_rect.bottom; y++) {
3977             for(x = local_rect.left; x < local_rect.right; x++) {
3978                 /*                      start              lines            pixels      */
3979                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
3980                 const BYTE *green = blue  + 1;
3981                 const BYTE *red = green + 1;
3982
3983                 for(c = 0; c < 256; c++) {
3984                     if(*red   == pal[c].peRed   &&
3985                        *green == pal[c].peGreen &&
3986                        *blue  == pal[c].peBlue)
3987                     {
3988                         *((BYTE *) dest + y * width + x) = c;
3989                         break;
3990                     }
3991                 }
3992             }
3993         }
3994         HeapFree(GetProcessHeap(), 0, mem);
3995     }
3996 }
3997
3998 /* Read the framebuffer contents into a texture */
3999 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
4000 {
4001     struct wined3d_device *device = surface->resource.device;
4002     const struct wined3d_gl_info *gl_info;
4003     struct wined3d_context *context;
4004
4005     if (!surface_is_offscreen(surface))
4006     {
4007         /* We would need to flip onscreen surfaces, but there's no efficient
4008          * way to do that here. It makes more sense for the caller to
4009          * explicitly go through sysmem. */
4010         ERR("Not supported for onscreen targets.\n");
4011         return;
4012     }
4013
4014     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
4015      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
4016      * states in the stateblock, and no driver was found yet that had bugs in that regard.
4017      */
4018     context = context_acquire(device, surface);
4019     gl_info = context->gl_info;
4020     device_invalidate_state(device, STATE_FRAMEBUFFER);
4021
4022     surface_prepare_texture(surface, gl_info, srgb);
4023     surface_bind_and_dirtify(surface, gl_info, srgb);
4024
4025     TRACE("Reading back offscreen render target %p.\n", surface);
4026
4027     ENTER_GL();
4028
4029     glReadBuffer(device->offscreenBuffer);
4030     checkGLcall("glReadBuffer");
4031
4032     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4033             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4034     checkGLcall("glCopyTexSubImage2D");
4035
4036     LEAVE_GL();
4037
4038     context_release(context);
4039 }
4040
4041 /* Context activation is done by the caller. */
4042 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4043         const struct wined3d_gl_info *gl_info, BOOL srgb)
4044 {
4045     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4046     CONVERT_TYPES convert;
4047     struct wined3d_format format;
4048
4049     if (surface->flags & alloc_flag) return;
4050
4051     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4052     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4053     else surface->flags &= ~SFLAG_CONVERTED;
4054
4055     surface_bind_and_dirtify(surface, gl_info, srgb);
4056     surface_allocate_surface(surface, gl_info, &format, srgb);
4057     surface->flags |= alloc_flag;
4058 }
4059
4060 /* Context activation is done by the caller. */
4061 void surface_prepare_texture(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
4062 {
4063     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4064     {
4065         struct wined3d_texture *texture = surface->container.u.texture;
4066         UINT sub_count = texture->level_count * texture->layer_count;
4067         UINT i;
4068
4069         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4070
4071         for (i = 0; i < sub_count; ++i)
4072         {
4073             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4074             surface_prepare_texture_internal(s, gl_info, srgb);
4075         }
4076
4077         return;
4078     }
4079
4080     surface_prepare_texture_internal(surface, gl_info, srgb);
4081 }
4082
4083 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4084         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4085 {
4086     struct wined3d_device *device = surface->resource.device;
4087     UINT pitch = wined3d_surface_get_pitch(surface);
4088     const struct wined3d_gl_info *gl_info;
4089     struct wined3d_context *context;
4090     RECT local_rect;
4091     UINT w, h;
4092
4093     surface_get_rect(surface, rect, &local_rect);
4094
4095     mem += local_rect.top * pitch + local_rect.left * bpp;
4096     w = local_rect.right - local_rect.left;
4097     h = local_rect.bottom - local_rect.top;
4098
4099     /* Activate the correct context for the render target */
4100     context = context_acquire(device, surface);
4101     context_apply_blit_state(context, device);
4102     gl_info = context->gl_info;
4103
4104     ENTER_GL();
4105
4106     if (!surface_is_offscreen(surface))
4107     {
4108         GLenum buffer = surface_get_gl_buffer(surface);
4109         TRACE("Unlocking %#x buffer.\n", buffer);
4110         context_set_draw_buffer(context, buffer);
4111
4112         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4113         glPixelZoom(1.0f, -1.0f);
4114     }
4115     else
4116     {
4117         /* Primary offscreen render target */
4118         TRACE("Offscreen render target.\n");
4119         context_set_draw_buffer(context, device->offscreenBuffer);
4120
4121         glPixelZoom(1.0f, 1.0f);
4122     }
4123
4124     glRasterPos3i(local_rect.left, local_rect.top, 1);
4125     checkGLcall("glRasterPos3i");
4126
4127     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4128     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4129
4130     if (surface->flags & SFLAG_PBO)
4131     {
4132         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4133         checkGLcall("glBindBufferARB");
4134     }
4135
4136     glDrawPixels(w, h, fmt, type, mem);
4137     checkGLcall("glDrawPixels");
4138
4139     if (surface->flags & SFLAG_PBO)
4140     {
4141         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4142         checkGLcall("glBindBufferARB");
4143     }
4144
4145     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4146     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4147
4148     LEAVE_GL();
4149
4150     if (wined3d_settings.strict_draw_ordering
4151             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4152             && surface->container.u.swapchain->front_buffer == surface))
4153         wglFlush();
4154
4155     context_release(context);
4156 }
4157
4158 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4159         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4160 {
4161     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4162     const struct wined3d_device *device = surface->resource.device;
4163     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4164     BOOL blit_supported = FALSE;
4165
4166     /* Copy the default values from the surface. Below we might perform fixups */
4167     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4168     *format = *surface->resource.format;
4169     *convert = NO_CONVERSION;
4170
4171     /* Ok, now look if we have to do any conversion */
4172     switch (surface->resource.format->id)
4173     {
4174         case WINED3DFMT_P8_UINT:
4175             /* Below the call to blit_supported is disabled for Wine 1.2
4176              * because the function isn't operating correctly yet. At the
4177              * moment 8-bit blits are handled in software and if certain GL
4178              * extensions are around, surface conversion is performed at
4179              * upload time. The blit_supported call recognizes it as a
4180              * destination fixup. This type of upload 'fixup' and 8-bit to
4181              * 8-bit blits need to be handled by the blit_shader.
4182              * TODO: get rid of this #if 0. */
4183 #if 0
4184             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4185                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4186                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4187 #endif
4188             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4189
4190             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4191              * texturing. Further also use conversion in case of color keying.
4192              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4193              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4194              * conflicts with this.
4195              */
4196             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4197                     || colorkey_active || !use_texturing)
4198             {
4199                 format->glFormat = GL_RGBA;
4200                 format->glInternal = GL_RGBA;
4201                 format->glType = GL_UNSIGNED_BYTE;
4202                 format->conv_byte_count = 4;
4203                 if (colorkey_active)
4204                     *convert = CONVERT_PALETTED_CK;
4205                 else
4206                     *convert = CONVERT_PALETTED;
4207             }
4208             break;
4209
4210         case WINED3DFMT_B2G3R3_UNORM:
4211             /* **********************
4212                 GL_UNSIGNED_BYTE_3_3_2
4213                 ********************** */
4214             if (colorkey_active) {
4215                 /* This texture format will never be used.. So do not care about color keying
4216                     up until the point in time it will be needed :-) */
4217                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4218             }
4219             break;
4220
4221         case WINED3DFMT_B5G6R5_UNORM:
4222             if (colorkey_active)
4223             {
4224                 *convert = CONVERT_CK_565;
4225                 format->glFormat = GL_RGBA;
4226                 format->glInternal = GL_RGB5_A1;
4227                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4228                 format->conv_byte_count = 2;
4229             }
4230             break;
4231
4232         case WINED3DFMT_B5G5R5X1_UNORM:
4233             if (colorkey_active)
4234             {
4235                 *convert = CONVERT_CK_5551;
4236                 format->glFormat = GL_BGRA;
4237                 format->glInternal = GL_RGB5_A1;
4238                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4239                 format->conv_byte_count = 2;
4240             }
4241             break;
4242
4243         case WINED3DFMT_B8G8R8_UNORM:
4244             if (colorkey_active)
4245             {
4246                 *convert = CONVERT_CK_RGB24;
4247                 format->glFormat = GL_RGBA;
4248                 format->glInternal = GL_RGBA8;
4249                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4250                 format->conv_byte_count = 4;
4251             }
4252             break;
4253
4254         case WINED3DFMT_B8G8R8X8_UNORM:
4255             if (colorkey_active)
4256             {
4257                 *convert = CONVERT_RGB32_888;
4258                 format->glFormat = GL_RGBA;
4259                 format->glInternal = GL_RGBA8;
4260                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4261                 format->conv_byte_count = 4;
4262             }
4263             break;
4264
4265         default:
4266             break;
4267     }
4268
4269     return WINED3D_OK;
4270 }
4271
4272 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4273 {
4274     const struct wined3d_device *device = surface->resource.device;
4275     const struct wined3d_palette *pal = surface->palette;
4276     BOOL index_in_alpha = FALSE;
4277     unsigned int i;
4278
4279     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4280      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4281      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4282      * duplicate entries. Store the color key in the unused alpha component to speed the
4283      * download up and to make conversion unneeded. */
4284     index_in_alpha = primary_render_target_is_p8(device);
4285
4286     if (!pal)
4287     {
4288         /* In DirectDraw the palette is a property of the surface, there are no such things as device palettes. */
4289         if (device->wined3d->flags & WINED3D_PALETTE_PER_SURFACE)
4290         {
4291             ERR("This code should never get entered for DirectDraw!, expect problems\n");
4292             if (index_in_alpha)
4293             {
4294                 /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4295                  * there's no palette at this time. */
4296                 for (i = 0; i < 256; i++) table[i][3] = i;
4297             }
4298         }
4299         else
4300         {
4301             /* Direct3D >= 8 palette usage style: P8 textures use device palettes, palette entry format is A8R8G8B8,
4302              * alpha is stored in peFlags and may be used by the app if D3DPTEXTURECAPS_ALPHAPALETTE device
4303              * capability flag is present (wine does advertise this capability) */
4304             for (i = 0; i < 256; ++i)
4305             {
4306                 table[i][0] = device->palettes[device->currentPalette][i].peRed;
4307                 table[i][1] = device->palettes[device->currentPalette][i].peGreen;
4308                 table[i][2] = device->palettes[device->currentPalette][i].peBlue;
4309                 table[i][3] = device->palettes[device->currentPalette][i].peFlags;
4310             }
4311         }
4312     }
4313     else
4314     {
4315         TRACE("Using surface palette %p\n", pal);
4316         /* Get the surface's palette */
4317         for (i = 0; i < 256; ++i)
4318         {
4319             table[i][0] = pal->palents[i].peRed;
4320             table[i][1] = pal->palents[i].peGreen;
4321             table[i][2] = pal->palents[i].peBlue;
4322
4323             /* When index_in_alpha is set the palette index is stored in the
4324              * alpha component. In case of a readback we can then read
4325              * GL_ALPHA. Color keying is handled in BltOverride using a
4326              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4327              * color key itself is passed to glAlphaFunc in other cases the
4328              * alpha component of pixels that should be masked away is set to 0. */
4329             if (index_in_alpha)
4330             {
4331                 table[i][3] = i;
4332             }
4333             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4334                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4335             {
4336                 table[i][3] = 0x00;
4337             }
4338             else if (pal->flags & WINEDDPCAPS_ALPHA)
4339             {
4340                 table[i][3] = pal->palents[i].peFlags;
4341             }
4342             else
4343             {
4344                 table[i][3] = 0xFF;
4345             }
4346         }
4347     }
4348 }
4349
4350 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4351         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4352 {
4353     const BYTE *source;
4354     BYTE *dest;
4355     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4356
4357     switch (convert) {
4358         case NO_CONVERSION:
4359         {
4360             memcpy(dst, src, pitch * height);
4361             break;
4362         }
4363         case CONVERT_PALETTED:
4364         case CONVERT_PALETTED_CK:
4365         {
4366             BYTE table[256][4];
4367             unsigned int x, y;
4368
4369             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4370
4371             for (y = 0; y < height; y++)
4372             {
4373                 source = src + pitch * y;
4374                 dest = dst + outpitch * y;
4375                 /* This is an 1 bpp format, using the width here is fine */
4376                 for (x = 0; x < width; x++) {
4377                     BYTE color = *source++;
4378                     *dest++ = table[color][0];
4379                     *dest++ = table[color][1];
4380                     *dest++ = table[color][2];
4381                     *dest++ = table[color][3];
4382                 }
4383             }
4384         }
4385         break;
4386
4387         case CONVERT_CK_565:
4388         {
4389             /* Converting the 565 format in 5551 packed to emulate color-keying.
4390
4391               Note : in all these conversion, it would be best to average the averaging
4392                       pixels to get the color of the pixel that will be color-keyed to
4393                       prevent 'color bleeding'. This will be done later on if ever it is
4394                       too visible.
4395
4396               Note2: Nvidia documents say that their driver does not support alpha + color keying
4397                      on the same surface and disables color keying in such a case
4398             */
4399             unsigned int x, y;
4400             const WORD *Source;
4401             WORD *Dest;
4402
4403             TRACE("Color keyed 565\n");
4404
4405             for (y = 0; y < height; y++) {
4406                 Source = (const WORD *)(src + y * pitch);
4407                 Dest = (WORD *) (dst + y * outpitch);
4408                 for (x = 0; x < width; x++ ) {
4409                     WORD color = *Source++;
4410                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4411                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4412                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4413                         *Dest |= 0x0001;
4414                     Dest++;
4415                 }
4416             }
4417         }
4418         break;
4419
4420         case CONVERT_CK_5551:
4421         {
4422             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4423             unsigned int x, y;
4424             const WORD *Source;
4425             WORD *Dest;
4426             TRACE("Color keyed 5551\n");
4427             for (y = 0; y < height; y++) {
4428                 Source = (const WORD *)(src + y * pitch);
4429                 Dest = (WORD *) (dst + y * outpitch);
4430                 for (x = 0; x < width; x++ ) {
4431                     WORD color = *Source++;
4432                     *Dest = color;
4433                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4434                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4435                         *Dest |= (1 << 15);
4436                     else
4437                         *Dest &= ~(1 << 15);
4438                     Dest++;
4439                 }
4440             }
4441         }
4442         break;
4443
4444         case CONVERT_CK_RGB24:
4445         {
4446             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4447             unsigned int x, y;
4448             for (y = 0; y < height; y++)
4449             {
4450                 source = src + pitch * y;
4451                 dest = dst + outpitch * y;
4452                 for (x = 0; x < width; x++) {
4453                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4454                     DWORD dstcolor = color << 8;
4455                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4456                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4457                         dstcolor |= 0xff;
4458                     *(DWORD*)dest = dstcolor;
4459                     source += 3;
4460                     dest += 4;
4461                 }
4462             }
4463         }
4464         break;
4465
4466         case CONVERT_RGB32_888:
4467         {
4468             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4469             unsigned int x, y;
4470             for (y = 0; y < height; y++)
4471             {
4472                 source = src + pitch * y;
4473                 dest = dst + outpitch * y;
4474                 for (x = 0; x < width; x++) {
4475                     DWORD color = 0xffffff & *(const DWORD*)source;
4476                     DWORD dstcolor = color << 8;
4477                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4478                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4479                         dstcolor |= 0xff;
4480                     *(DWORD*)dest = dstcolor;
4481                     source += 4;
4482                     dest += 4;
4483                 }
4484             }
4485         }
4486         break;
4487
4488         default:
4489             ERR("Unsupported conversion type %#x.\n", convert);
4490     }
4491     return WINED3D_OK;
4492 }
4493
4494 BOOL palette9_changed(struct wined3d_surface *surface)
4495 {
4496     struct wined3d_device *device = surface->resource.device;
4497
4498     if (surface->palette || (surface->resource.format->id != WINED3DFMT_P8_UINT
4499             && surface->resource.format->id != WINED3DFMT_P8_UINT_A8_UNORM))
4500     {
4501         /* If a ddraw-style palette is attached assume no d3d9 palette change.
4502          * Also the palette isn't interesting if the surface format isn't P8 or A8P8
4503          */
4504         return FALSE;
4505     }
4506
4507     if (surface->palette9)
4508     {
4509         if (!memcmp(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256))
4510         {
4511             return FALSE;
4512         }
4513     }
4514     else
4515     {
4516         surface->palette9 = HeapAlloc(GetProcessHeap(), 0, sizeof(PALETTEENTRY) * 256);
4517     }
4518     memcpy(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256);
4519
4520     return TRUE;
4521 }
4522
4523 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4524 {
4525     /* Flip the surface contents */
4526     /* Flip the DC */
4527     {
4528         HDC tmp;
4529         tmp = front->hDC;
4530         front->hDC = back->hDC;
4531         back->hDC = tmp;
4532     }
4533
4534     /* Flip the DIBsection */
4535     {
4536         HBITMAP tmp;
4537         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4538         tmp = front->dib.DIBsection;
4539         front->dib.DIBsection = back->dib.DIBsection;
4540         back->dib.DIBsection = tmp;
4541
4542         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4543         else front->flags &= ~SFLAG_DIBSECTION;
4544         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4545         else back->flags &= ~SFLAG_DIBSECTION;
4546     }
4547
4548     /* Flip the surface data */
4549     {
4550         void* tmp;
4551
4552         tmp = front->dib.bitmap_data;
4553         front->dib.bitmap_data = back->dib.bitmap_data;
4554         back->dib.bitmap_data = tmp;
4555
4556         tmp = front->resource.allocatedMemory;
4557         front->resource.allocatedMemory = back->resource.allocatedMemory;
4558         back->resource.allocatedMemory = tmp;
4559
4560         tmp = front->resource.heapMemory;
4561         front->resource.heapMemory = back->resource.heapMemory;
4562         back->resource.heapMemory = tmp;
4563     }
4564
4565     /* Flip the PBO */
4566     {
4567         GLuint tmp_pbo = front->pbo;
4568         front->pbo = back->pbo;
4569         back->pbo = tmp_pbo;
4570     }
4571
4572     /* client_memory should not be different, but just in case */
4573     {
4574         BOOL tmp;
4575         tmp = front->dib.client_memory;
4576         front->dib.client_memory = back->dib.client_memory;
4577         back->dib.client_memory = tmp;
4578     }
4579
4580     /* Flip the opengl texture */
4581     {
4582         GLuint tmp;
4583
4584         tmp = back->texture_name;
4585         back->texture_name = front->texture_name;
4586         front->texture_name = tmp;
4587
4588         tmp = back->texture_name_srgb;
4589         back->texture_name_srgb = front->texture_name_srgb;
4590         front->texture_name_srgb = tmp;
4591
4592         resource_unload(&back->resource);
4593         resource_unload(&front->resource);
4594     }
4595
4596     {
4597         DWORD tmp_flags = back->flags;
4598         back->flags = front->flags;
4599         front->flags = tmp_flags;
4600     }
4601 }
4602
4603 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4604  * pixel copy calls. */
4605 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4606         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4607 {
4608     struct wined3d_device *device = dst_surface->resource.device;
4609     float xrel, yrel;
4610     UINT row;
4611     struct wined3d_context *context;
4612     BOOL upsidedown = FALSE;
4613     RECT dst_rect = *dst_rect_in;
4614
4615     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4616      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4617      */
4618     if(dst_rect.top > dst_rect.bottom) {
4619         UINT tmp = dst_rect.bottom;
4620         dst_rect.bottom = dst_rect.top;
4621         dst_rect.top = tmp;
4622         upsidedown = TRUE;
4623     }
4624
4625     context = context_acquire(device, src_surface);
4626     context_apply_blit_state(context, device);
4627     surface_internal_preload(dst_surface, SRGB_RGB);
4628     ENTER_GL();
4629
4630     /* Bind the target texture */
4631     glBindTexture(dst_surface->texture_target, dst_surface->texture_name);
4632     checkGLcall("glBindTexture");
4633     if (surface_is_offscreen(src_surface))
4634     {
4635         TRACE("Reading from an offscreen target\n");
4636         upsidedown = !upsidedown;
4637         glReadBuffer(device->offscreenBuffer);
4638     }
4639     else
4640     {
4641         glReadBuffer(surface_get_gl_buffer(src_surface));
4642     }
4643     checkGLcall("glReadBuffer");
4644
4645     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4646     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4647
4648     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4649     {
4650         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4651
4652         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4653             ERR("Texture filtering not supported in direct blit\n");
4654         }
4655     }
4656     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4657             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4658     {
4659         ERR("Texture filtering not supported in direct blit\n");
4660     }
4661
4662     if (upsidedown
4663             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4664             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4665     {
4666         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4667
4668         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4669                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4670                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4671                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4672     }
4673     else
4674     {
4675         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4676         /* I have to process this row by row to swap the image,
4677          * otherwise it would be upside down, so stretching in y direction
4678          * doesn't cost extra time
4679          *
4680          * However, stretching in x direction can be avoided if not necessary
4681          */
4682         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4683             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4684             {
4685                 /* Well, that stuff works, but it's very slow.
4686                  * find a better way instead
4687                  */
4688                 UINT col;
4689
4690                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4691                 {
4692                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4693                             dst_rect.left + col /* x offset */, row /* y offset */,
4694                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4695                 }
4696             }
4697             else
4698             {
4699                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4700                         dst_rect.left /* x offset */, row /* y offset */,
4701                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4702             }
4703         }
4704     }
4705     checkGLcall("glCopyTexSubImage2D");
4706
4707     LEAVE_GL();
4708     context_release(context);
4709
4710     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4711      * path is never entered
4712      */
4713     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4714 }
4715
4716 /* Uses the hardware to stretch and flip the image */
4717 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4718         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4719 {
4720     struct wined3d_device *device = dst_surface->resource.device;
4721     struct wined3d_swapchain *src_swapchain = NULL;
4722     GLuint src, backup = 0;
4723     float left, right, top, bottom; /* Texture coordinates */
4724     UINT fbwidth = src_surface->resource.width;
4725     UINT fbheight = src_surface->resource.height;
4726     struct wined3d_context *context;
4727     GLenum drawBuffer = GL_BACK;
4728     GLenum texture_target;
4729     BOOL noBackBufferBackup;
4730     BOOL src_offscreen;
4731     BOOL upsidedown = FALSE;
4732     RECT dst_rect = *dst_rect_in;
4733
4734     TRACE("Using hwstretch blit\n");
4735     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4736     context = context_acquire(device, src_surface);
4737     context_apply_blit_state(context, device);
4738     surface_internal_preload(dst_surface, SRGB_RGB);
4739
4740     src_offscreen = surface_is_offscreen(src_surface);
4741     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4742     if (!noBackBufferBackup && !src_surface->texture_name)
4743     {
4744         /* Get it a description */
4745         surface_internal_preload(src_surface, SRGB_RGB);
4746     }
4747     ENTER_GL();
4748
4749     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4750      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4751      */
4752     if (context->aux_buffers >= 2)
4753     {
4754         /* Got more than one aux buffer? Use the 2nd aux buffer */
4755         drawBuffer = GL_AUX1;
4756     }
4757     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4758     {
4759         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4760         drawBuffer = GL_AUX0;
4761     }
4762
4763     if(noBackBufferBackup) {
4764         glGenTextures(1, &backup);
4765         checkGLcall("glGenTextures");
4766         glBindTexture(GL_TEXTURE_2D, backup);
4767         checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4768         texture_target = GL_TEXTURE_2D;
4769     } else {
4770         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4771          * we are reading from the back buffer, the backup can be used as source texture
4772          */
4773         texture_target = src_surface->texture_target;
4774         glBindTexture(texture_target, src_surface->texture_name);
4775         checkGLcall("glBindTexture(texture_target, src_surface->texture_name)");
4776         glEnable(texture_target);
4777         checkGLcall("glEnable(texture_target)");
4778
4779         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4780         src_surface->flags &= ~SFLAG_INTEXTURE;
4781     }
4782
4783     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4784      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4785      */
4786     if(dst_rect.top > dst_rect.bottom) {
4787         UINT tmp = dst_rect.bottom;
4788         dst_rect.bottom = dst_rect.top;
4789         dst_rect.top = tmp;
4790         upsidedown = TRUE;
4791     }
4792
4793     if (src_offscreen)
4794     {
4795         TRACE("Reading from an offscreen target\n");
4796         upsidedown = !upsidedown;
4797         glReadBuffer(device->offscreenBuffer);
4798     }
4799     else
4800     {
4801         glReadBuffer(surface_get_gl_buffer(src_surface));
4802     }
4803
4804     /* TODO: Only back up the part that will be overwritten */
4805     glCopyTexSubImage2D(texture_target, 0,
4806                         0, 0 /* read offsets */,
4807                         0, 0,
4808                         fbwidth,
4809                         fbheight);
4810
4811     checkGLcall("glCopyTexSubImage2D");
4812
4813     /* No issue with overriding these - the sampler is dirty due to blit usage */
4814     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
4815             wined3d_gl_mag_filter(magLookup, Filter));
4816     checkGLcall("glTexParameteri");
4817     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
4818             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
4819     checkGLcall("glTexParameteri");
4820
4821     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4822         src_swapchain = src_surface->container.u.swapchain;
4823     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
4824     {
4825         src = backup ? backup : src_surface->texture_name;
4826     }
4827     else
4828     {
4829         glReadBuffer(GL_FRONT);
4830         checkGLcall("glReadBuffer(GL_FRONT)");
4831
4832         glGenTextures(1, &src);
4833         checkGLcall("glGenTextures(1, &src)");
4834         glBindTexture(GL_TEXTURE_2D, src);
4835         checkGLcall("glBindTexture(GL_TEXTURE_2D, src)");
4836
4837         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
4838          * out for power of 2 sizes
4839          */
4840         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
4841                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
4842         checkGLcall("glTexImage2D");
4843         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
4844                             0, 0 /* read offsets */,
4845                             0, 0,
4846                             fbwidth,
4847                             fbheight);
4848
4849         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
4850         checkGLcall("glTexParameteri");
4851         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
4852         checkGLcall("glTexParameteri");
4853
4854         glReadBuffer(GL_BACK);
4855         checkGLcall("glReadBuffer(GL_BACK)");
4856
4857         if(texture_target != GL_TEXTURE_2D) {
4858             glDisable(texture_target);
4859             glEnable(GL_TEXTURE_2D);
4860             texture_target = GL_TEXTURE_2D;
4861         }
4862     }
4863     checkGLcall("glEnd and previous");
4864
4865     left = src_rect->left;
4866     right = src_rect->right;
4867
4868     if (!upsidedown)
4869     {
4870         top = src_surface->resource.height - src_rect->top;
4871         bottom = src_surface->resource.height - src_rect->bottom;
4872     }
4873     else
4874     {
4875         top = src_surface->resource.height - src_rect->bottom;
4876         bottom = src_surface->resource.height - src_rect->top;
4877     }
4878
4879     if (src_surface->flags & SFLAG_NORMCOORD)
4880     {
4881         left /= src_surface->pow2Width;
4882         right /= src_surface->pow2Width;
4883         top /= src_surface->pow2Height;
4884         bottom /= src_surface->pow2Height;
4885     }
4886
4887     /* draw the source texture stretched and upside down. The correct surface is bound already */
4888     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
4889     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
4890
4891     context_set_draw_buffer(context, drawBuffer);
4892     glReadBuffer(drawBuffer);
4893
4894     glBegin(GL_QUADS);
4895         /* bottom left */
4896         glTexCoord2f(left, bottom);
4897         glVertex2i(0, 0);
4898
4899         /* top left */
4900         glTexCoord2f(left, top);
4901         glVertex2i(0, dst_rect.bottom - dst_rect.top);
4902
4903         /* top right */
4904         glTexCoord2f(right, top);
4905         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4906
4907         /* bottom right */
4908         glTexCoord2f(right, bottom);
4909         glVertex2i(dst_rect.right - dst_rect.left, 0);
4910     glEnd();
4911     checkGLcall("glEnd and previous");
4912
4913     if (texture_target != dst_surface->texture_target)
4914     {
4915         glDisable(texture_target);
4916         glEnable(dst_surface->texture_target);
4917         texture_target = dst_surface->texture_target;
4918     }
4919
4920     /* Now read the stretched and upside down image into the destination texture */
4921     glBindTexture(texture_target, dst_surface->texture_name);
4922     checkGLcall("glBindTexture");
4923     glCopyTexSubImage2D(texture_target,
4924                         0,
4925                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
4926                         0, 0, /* We blitted the image to the origin */
4927                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4928     checkGLcall("glCopyTexSubImage2D");
4929
4930     if(drawBuffer == GL_BACK) {
4931         /* Write the back buffer backup back */
4932         if(backup) {
4933             if(texture_target != GL_TEXTURE_2D) {
4934                 glDisable(texture_target);
4935                 glEnable(GL_TEXTURE_2D);
4936                 texture_target = GL_TEXTURE_2D;
4937             }
4938             glBindTexture(GL_TEXTURE_2D, backup);
4939             checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4940         }
4941         else
4942         {
4943             if (texture_target != src_surface->texture_target)
4944             {
4945                 glDisable(texture_target);
4946                 glEnable(src_surface->texture_target);
4947                 texture_target = src_surface->texture_target;
4948             }
4949             glBindTexture(src_surface->texture_target, src_surface->texture_name);
4950             checkGLcall("glBindTexture(src_surface->texture_target, src_surface->texture_name)");
4951         }
4952
4953         glBegin(GL_QUADS);
4954             /* top left */
4955             glTexCoord2f(0.0f, 0.0f);
4956             glVertex2i(0, fbheight);
4957
4958             /* bottom left */
4959             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
4960             glVertex2i(0, 0);
4961
4962             /* bottom right */
4963             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
4964                     (float)fbheight / (float)src_surface->pow2Height);
4965             glVertex2i(fbwidth, 0);
4966
4967             /* top right */
4968             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
4969             glVertex2i(fbwidth, fbheight);
4970         glEnd();
4971     }
4972     glDisable(texture_target);
4973     checkGLcall("glDisable(texture_target)");
4974
4975     /* Cleanup */
4976     if (src != src_surface->texture_name && src != backup)
4977     {
4978         glDeleteTextures(1, &src);
4979         checkGLcall("glDeleteTextures(1, &src)");
4980     }
4981     if(backup) {
4982         glDeleteTextures(1, &backup);
4983         checkGLcall("glDeleteTextures(1, &backup)");
4984     }
4985
4986     LEAVE_GL();
4987
4988     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
4989
4990     context_release(context);
4991
4992     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4993      * path is never entered
4994      */
4995     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4996 }
4997
4998 /* Front buffer coordinates are always full screen coordinates, but our GL
4999  * drawable is limited to the window's client area. The sysmem and texture
5000  * copies do have the full screen size. Note that GL has a bottom-left
5001  * origin, while D3D has a top-left origin. */
5002 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5003 {
5004     UINT drawable_height;
5005
5006     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5007             && surface == surface->container.u.swapchain->front_buffer)
5008     {
5009         POINT offset = {0, 0};
5010         RECT windowsize;
5011
5012         ScreenToClient(window, &offset);
5013         OffsetRect(rect, offset.x, offset.y);
5014
5015         GetClientRect(window, &windowsize);
5016         drawable_height = windowsize.bottom - windowsize.top;
5017     }
5018     else
5019     {
5020         drawable_height = surface->resource.height;
5021     }
5022
5023     rect->top = drawable_height - rect->top;
5024     rect->bottom = drawable_height - rect->bottom;
5025 }
5026
5027 /* blit between surface locations. onscreen on different swapchains is not supported.
5028  * depth / stencil is not supported. */
5029 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
5030         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
5031         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
5032 {
5033     const struct wined3d_gl_info *gl_info;
5034     struct wined3d_context *context;
5035     RECT src_rect, dst_rect;
5036     GLenum gl_filter;
5037     GLenum buffer;
5038
5039     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
5040     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
5041             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
5042     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
5043             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
5044
5045     src_rect = *src_rect_in;
5046     dst_rect = *dst_rect_in;
5047
5048     switch (filter)
5049     {
5050         case WINED3DTEXF_LINEAR:
5051             gl_filter = GL_LINEAR;
5052             break;
5053
5054         default:
5055             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
5056         case WINED3DTEXF_NONE:
5057         case WINED3DTEXF_POINT:
5058             gl_filter = GL_NEAREST;
5059             break;
5060     }
5061
5062     if (src_location == SFLAG_INDRAWABLE && surface_is_offscreen(src_surface))
5063         src_location = SFLAG_INTEXTURE;
5064     if (dst_location == SFLAG_INDRAWABLE && surface_is_offscreen(dst_surface))
5065         dst_location = SFLAG_INTEXTURE;
5066
5067     /* Make sure the locations are up-to-date. Loading the destination
5068      * surface isn't required if the entire surface is overwritten. (And is
5069      * in fact harmful if we're being called by surface_load_location() with
5070      * the purpose of loading the destination surface.) */
5071     surface_load_location(src_surface, src_location, NULL);
5072     if (!surface_is_full_rect(dst_surface, &dst_rect))
5073         surface_load_location(dst_surface, dst_location, NULL);
5074
5075     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
5076     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
5077     else context = context_acquire(device, NULL);
5078
5079     if (!context->valid)
5080     {
5081         context_release(context);
5082         WARN("Invalid context, skipping blit.\n");
5083         return;
5084     }
5085
5086     gl_info = context->gl_info;
5087
5088     if (src_location == SFLAG_INDRAWABLE)
5089     {
5090         TRACE("Source surface %p is onscreen.\n", src_surface);
5091         buffer = surface_get_gl_buffer(src_surface);
5092         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
5093     }
5094     else
5095     {
5096         TRACE("Source surface %p is offscreen.\n", src_surface);
5097         buffer = GL_COLOR_ATTACHMENT0;
5098     }
5099
5100     ENTER_GL();
5101     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
5102     glReadBuffer(buffer);
5103     checkGLcall("glReadBuffer()");
5104     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
5105     LEAVE_GL();
5106
5107     if (dst_location == SFLAG_INDRAWABLE)
5108     {
5109         TRACE("Destination surface %p is onscreen.\n", dst_surface);
5110         buffer = surface_get_gl_buffer(dst_surface);
5111         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5112     }
5113     else
5114     {
5115         TRACE("Destination surface %p is offscreen.\n", dst_surface);
5116         buffer = GL_COLOR_ATTACHMENT0;
5117     }
5118
5119     ENTER_GL();
5120     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
5121     context_set_draw_buffer(context, buffer);
5122     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
5123     context_invalidate_state(context, STATE_FRAMEBUFFER);
5124
5125     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
5126     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
5127     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
5128     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
5129     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
5130
5131     glDisable(GL_SCISSOR_TEST);
5132     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
5133
5134     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
5135             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
5136     checkGLcall("glBlitFramebuffer()");
5137
5138     LEAVE_GL();
5139
5140     if (wined3d_settings.strict_draw_ordering
5141             || (dst_location == SFLAG_INDRAWABLE
5142             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
5143         wglFlush();
5144
5145     context_release(context);
5146 }
5147
5148 static void surface_blt_to_drawable(struct wined3d_device *device,
5149         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5150         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5151         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5152 {
5153     struct wined3d_context *context;
5154     RECT src_rect, dst_rect;
5155
5156     src_rect = *src_rect_in;
5157     dst_rect = *dst_rect_in;
5158
5159     /* Make sure the surface is up-to-date. This should probably use
5160      * surface_load_location() and worry about the destination surface too,
5161      * unless we're overwriting it completely. */
5162     surface_internal_preload(src_surface, SRGB_RGB);
5163
5164     /* Activate the destination context, set it up for blitting */
5165     context = context_acquire(device, dst_surface);
5166     context_apply_blit_state(context, device);
5167
5168     if (!surface_is_offscreen(dst_surface))
5169         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5170
5171     device->blitter->set_shader(device->blit_priv, context->gl_info, src_surface);
5172
5173     ENTER_GL();
5174
5175     if (color_key)
5176     {
5177         glEnable(GL_ALPHA_TEST);
5178         checkGLcall("glEnable(GL_ALPHA_TEST)");
5179
5180         /* When the primary render target uses P8, the alpha component
5181          * contains the palette index. Which means that the colorkey is one of
5182          * the palette entries. In other cases pixels that should be masked
5183          * away have alpha set to 0. */
5184         if (primary_render_target_is_p8(device))
5185             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5186         else
5187             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5188         checkGLcall("glAlphaFunc");
5189     }
5190     else
5191     {
5192         glDisable(GL_ALPHA_TEST);
5193         checkGLcall("glDisable(GL_ALPHA_TEST)");
5194     }
5195
5196     draw_textured_quad(src_surface, &src_rect, &dst_rect, filter);
5197
5198     if (color_key)
5199     {
5200         glDisable(GL_ALPHA_TEST);
5201         checkGLcall("glDisable(GL_ALPHA_TEST)");
5202     }
5203
5204     LEAVE_GL();
5205
5206     /* Leave the opengl state valid for blitting */
5207     device->blitter->unset_shader(context->gl_info);
5208
5209     if (wined3d_settings.strict_draw_ordering
5210             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5211             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5212         wglFlush(); /* Flush to ensure ordering across contexts. */
5213
5214     context_release(context);
5215 }
5216
5217 /* Do not call while under the GL lock. */
5218 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5219 {
5220     struct wined3d_device *device = s->resource.device;
5221     const struct blit_shader *blitter;
5222
5223     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5224             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5225     if (!blitter)
5226     {
5227         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5228         return WINED3DERR_INVALIDCALL;
5229     }
5230
5231     return blitter->color_fill(device, s, rect, color);
5232 }
5233
5234 /* Do not call while under the GL lock. */
5235 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5236         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5237         WINED3DTEXTUREFILTERTYPE Filter)
5238 {
5239     struct wined3d_device *device = dst_surface->resource.device;
5240     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5241     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5242
5243     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5244             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5245             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5246
5247     /* Get the swapchain. One of the surfaces has to be a primary surface */
5248     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5249     {
5250         WARN("Destination is in sysmem, rejecting gl blt\n");
5251         return WINED3DERR_INVALIDCALL;
5252     }
5253
5254     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5255         dstSwapchain = dst_surface->container.u.swapchain;
5256
5257     if (src_surface)
5258     {
5259         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5260         {
5261             WARN("Src is in sysmem, rejecting gl blt\n");
5262             return WINED3DERR_INVALIDCALL;
5263         }
5264
5265         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5266             srcSwapchain = src_surface->container.u.swapchain;
5267     }
5268
5269     /* Early sort out of cases where no render target is used */
5270     if (!dstSwapchain && !srcSwapchain
5271             && src_surface != device->fb.render_targets[0]
5272             && dst_surface != device->fb.render_targets[0])
5273     {
5274         TRACE("No surface is render target, not using hardware blit.\n");
5275         return WINED3DERR_INVALIDCALL;
5276     }
5277
5278     /* No destination color keying supported */
5279     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5280     {
5281         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5282         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5283         return WINED3DERR_INVALIDCALL;
5284     }
5285
5286     /* The only case where both surfaces on a swapchain are supported is a back buffer -> front buffer blit on the same swapchain */
5287     if (dstSwapchain && dstSwapchain == srcSwapchain && dstSwapchain->back_buffers
5288             && dst_surface == dstSwapchain->front_buffer
5289             && src_surface == dstSwapchain->back_buffers[0])
5290     {
5291         /* Half-Life does a Blt from the back buffer to the front buffer,
5292          * Full surface size, no flags... Use present instead
5293          *
5294          * This path will only be entered for d3d7 and ddraw apps, because d3d8/9 offer no way to blit TO the front buffer
5295          */
5296
5297         /* Check rects - wined3d_swapchain_present() doesn't handle them. */
5298         for (;;)
5299         {
5300             TRACE("Looking if a Present can be done...\n");
5301             /* Source Rectangle must be full surface */
5302             if (src_rect->left || src_rect->top
5303                     || src_rect->right != src_surface->resource.width
5304                     || src_rect->bottom != src_surface->resource.height)
5305             {
5306                 TRACE("No, Source rectangle doesn't match\n");
5307                 break;
5308             }
5309
5310             /* No stretching may occur */
5311             if (src_rect->right != dst_rect->right - dst_rect->left
5312                     || src_rect->bottom != dst_rect->bottom - dst_rect->top)
5313             {
5314                 TRACE("No, stretching is done\n");
5315                 break;
5316             }
5317
5318             /* Destination must be full surface or match the clipping rectangle */
5319             if (dst_surface->clipper && dst_surface->clipper->hWnd)
5320             {
5321                 RECT cliprect;
5322                 POINT pos[2];
5323                 GetClientRect(dst_surface->clipper->hWnd, &cliprect);
5324                 pos[0].x = dst_rect->left;
5325                 pos[0].y = dst_rect->top;
5326                 pos[1].x = dst_rect->right;
5327                 pos[1].y = dst_rect->bottom;
5328                 MapWindowPoints(GetDesktopWindow(), dst_surface->clipper->hWnd, pos, 2);
5329
5330                 if (pos[0].x != cliprect.left || pos[0].y != cliprect.top
5331                         || pos[1].x != cliprect.right || pos[1].y != cliprect.bottom)
5332                 {
5333                     TRACE("No, dest rectangle doesn't match(clipper)\n");
5334                     TRACE("Clip rect at %s\n", wine_dbgstr_rect(&cliprect));
5335                     TRACE("Blt dest: %s\n", wine_dbgstr_rect(dst_rect));
5336                     break;
5337                 }
5338             }
5339             else if (dst_rect->left || dst_rect->top
5340                     || dst_rect->right != dst_surface->resource.width
5341                     || dst_rect->bottom != dst_surface->resource.height)
5342             {
5343                 TRACE("No, dest rectangle doesn't match(surface size)\n");
5344                 break;
5345             }
5346
5347             TRACE("Yes\n");
5348
5349             /* These flags are unimportant for the flag check, remove them */
5350             if (!(flags & ~(WINEDDBLT_DONOTWAIT | WINEDDBLT_WAIT)))
5351             {
5352                 WINED3DSWAPEFFECT orig_swap = dstSwapchain->presentParms.SwapEffect;
5353
5354                 /* The idea behind this is that a glReadPixels and a glDrawPixels call
5355                     * take very long, while a flip is fast.
5356                     * This applies to Half-Life, which does such Blts every time it finished
5357                     * a frame, and to Prince of Persia 3D, which uses this to draw at least the main
5358                     * menu. This is also used by all apps when they do windowed rendering
5359                     *
5360                     * The problem is that flipping is not really the same as copying. After a
5361                     * Blt the front buffer is a copy of the back buffer, and the back buffer is
5362                     * untouched. Therefore it's necessary to override the swap effect
5363                     * and to set it back after the flip.
5364                     *
5365                     * Windowed Direct3D < 7 apps do the same. The D3D7 sdk demos are nice
5366                     * testcases.
5367                     */
5368
5369                 dstSwapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
5370                 dstSwapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
5371
5372                 TRACE("Full screen back buffer -> front buffer blt, performing a flip instead.\n");
5373                 wined3d_swapchain_present(dstSwapchain, NULL, NULL, dstSwapchain->win_handle, NULL, 0);
5374
5375                 dstSwapchain->presentParms.SwapEffect = orig_swap;
5376
5377                 return WINED3D_OK;
5378             }
5379             break;
5380         }
5381
5382         TRACE("Unsupported blit between buffers on the same swapchain\n");
5383         return WINED3DERR_INVALIDCALL;
5384     } else if(dstSwapchain && dstSwapchain == srcSwapchain) {
5385         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5386         return WINED3DERR_INVALIDCALL;
5387     } else if(dstSwapchain && srcSwapchain) {
5388         FIXME("Implement hardware blit between two different swapchains\n");
5389         return WINED3DERR_INVALIDCALL;
5390     }
5391     else if (dstSwapchain)
5392     {
5393         /* Handled with regular texture -> swapchain blit */
5394         if (src_surface == device->fb.render_targets[0])
5395             TRACE("Blit from active render target to a swapchain\n");
5396     }
5397     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5398     {
5399         FIXME("Implement blit from a swapchain to the active render target\n");
5400         return WINED3DERR_INVALIDCALL;
5401     }
5402
5403     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5404     {
5405         /* Blit from render target to texture */
5406         BOOL stretchx;
5407
5408         /* P8 read back is not implemented */
5409         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5410                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5411         {
5412             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5413             return WINED3DERR_INVALIDCALL;
5414         }
5415
5416         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5417         {
5418             TRACE("Color keying not supported by frame buffer to texture blit\n");
5419             return WINED3DERR_INVALIDCALL;
5420             /* Destination color key is checked above */
5421         }
5422
5423         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5424             stretchx = TRUE;
5425         else
5426             stretchx = FALSE;
5427
5428         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5429          * flip the image nor scale it.
5430          *
5431          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5432          * -> If the app wants a image width an unscaled width, copy it line per line
5433          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5434          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5435          *    back buffer. This is slower than reading line per line, thus not used for flipping
5436          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5437          *    pixel by pixel
5438          *
5439          * If EXT_framebuffer_blit is supported that can be used instead. Note that EXT_framebuffer_blit implies
5440          * FBO support, so it doesn't really make sense to try and make it work with different offscreen rendering
5441          * backends. */
5442         if (fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5443                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5444                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5445         {
5446             surface_blt_fbo(device, Filter,
5447                     src_surface, SFLAG_INDRAWABLE, src_rect,
5448                     dst_surface, SFLAG_INDRAWABLE, dst_rect);
5449             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5450         }
5451         else if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5452                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5453         {
5454             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5455             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5456         } else {
5457             TRACE("Using hardware stretching to flip / stretch the texture\n");
5458             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5459         }
5460
5461         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5462         {
5463             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5464             dst_surface->resource.allocatedMemory = NULL;
5465             dst_surface->resource.heapMemory = NULL;
5466         }
5467         else
5468         {
5469             dst_surface->flags &= ~SFLAG_INSYSMEM;
5470         }
5471
5472         return WINED3D_OK;
5473     }
5474     else if (src_surface)
5475     {
5476         /* Blit from offscreen surface to render target */
5477         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5478         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5479
5480         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5481
5482         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5483                 && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5484                         src_rect, src_surface->resource.usage, src_surface->resource.pool,
5485                         src_surface->resource.format,
5486                         dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5487                         dst_surface->resource.format))
5488         {
5489             TRACE("Using surface_blt_fbo.\n");
5490             /* The source is always a texture, but never the currently active render target, and the texture
5491              * contents are never upside down. */
5492             surface_blt_fbo(device, Filter,
5493                     src_surface, SFLAG_INDRAWABLE, src_rect,
5494                     dst_surface, SFLAG_INDRAWABLE, dst_rect);
5495             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5496             return WINED3D_OK;
5497         }
5498
5499         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5500                 && arbfp_blit.blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5501                         src_rect, src_surface->resource.usage, src_surface->resource.pool,
5502                         src_surface->resource.format,
5503                         dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5504                         dst_surface->resource.format))
5505             return arbfp_blit_surface(device, Filter, src_surface, src_rect, dst_surface, dst_rect);
5506
5507         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5508                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5509                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5510         {
5511             FIXME("Unsupported blit operation falling back to software\n");
5512             return WINED3DERR_INVALIDCALL;
5513         }
5514
5515         /* Color keying: Check if we have to do a color keyed blt,
5516          * and if not check if a color key is activated.
5517          *
5518          * Just modify the color keying parameters in the surface and restore them afterwards
5519          * The surface keeps track of the color key last used to load the opengl surface.
5520          * PreLoad will catch the change to the flags and color key and reload if necessary.
5521          */
5522         if (flags & WINEDDBLT_KEYSRC)
5523         {
5524             /* Use color key from surface */
5525         }
5526         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5527         {
5528             /* Use color key from DDBltFx */
5529             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5530             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5531         }
5532         else
5533         {
5534             /* Do not use color key */
5535             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5536         }
5537
5538         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5539                 src_surface, src_rect, dst_surface, dst_rect);
5540
5541         /* Restore the color key parameters */
5542         src_surface->CKeyFlags = oldCKeyFlags;
5543         src_surface->SrcBltCKey = oldBltCKey;
5544
5545         surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5546
5547         return WINED3D_OK;
5548     }
5549     else
5550     {
5551         /* Source-Less Blit to render target */
5552         if (flags & WINEDDBLT_COLORFILL)
5553         {
5554             WINED3DCOLORVALUE color;
5555
5556             TRACE("Colorfill\n");
5557
5558             /* The color as given in the Blt function is in the surface format. */
5559             if (!surface_convert_color_to_float(dst_surface, DDBltFx->u5.dwFillColor, &color))
5560                 return WINED3DERR_INVALIDCALL;
5561
5562             return surface_color_fill(dst_surface, dst_rect, &color);
5563         }
5564     }
5565
5566     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5567     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5568     return WINED3DERR_INVALIDCALL;
5569 }
5570
5571 /* GL locking is done by the caller */
5572 static void surface_depth_blt(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
5573         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5574 {
5575     struct wined3d_device *device = surface->resource.device;
5576     GLint compare_mode = GL_NONE;
5577     struct blt_info info;
5578     GLint old_binding = 0;
5579     RECT rect;
5580
5581     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5582
5583     glDisable(GL_CULL_FACE);
5584     glDisable(GL_BLEND);
5585     glDisable(GL_ALPHA_TEST);
5586     glDisable(GL_SCISSOR_TEST);
5587     glDisable(GL_STENCIL_TEST);
5588     glEnable(GL_DEPTH_TEST);
5589     glDepthFunc(GL_ALWAYS);
5590     glDepthMask(GL_TRUE);
5591     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5592     glViewport(x, y, w, h);
5593
5594     SetRect(&rect, 0, h, w, 0);
5595     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5596     GL_EXTCALL(glActiveTextureARB(GL_TEXTURE0_ARB));
5597     glGetIntegerv(info.binding, &old_binding);
5598     glBindTexture(info.bind_target, texture);
5599     if (gl_info->supported[ARB_SHADOW])
5600     {
5601         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5602         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5603     }
5604
5605     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5606             gl_info, info.tex_type, &surface->ds_current_size);
5607
5608     glBegin(GL_TRIANGLE_STRIP);
5609     glTexCoord3fv(info.coords[0]);
5610     glVertex2f(-1.0f, -1.0f);
5611     glTexCoord3fv(info.coords[1]);
5612     glVertex2f(1.0f, -1.0f);
5613     glTexCoord3fv(info.coords[2]);
5614     glVertex2f(-1.0f, 1.0f);
5615     glTexCoord3fv(info.coords[3]);
5616     glVertex2f(1.0f, 1.0f);
5617     glEnd();
5618
5619     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5620     glBindTexture(info.bind_target, old_binding);
5621
5622     glPopAttrib();
5623
5624     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5625 }
5626
5627 void surface_modify_ds_location(struct wined3d_surface *surface,
5628         DWORD location, UINT w, UINT h)
5629 {
5630     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5631
5632     if (location & ~SFLAG_DS_LOCATIONS)
5633         FIXME("Invalid location (%#x) specified.\n", location);
5634
5635     surface->ds_current_size.cx = w;
5636     surface->ds_current_size.cy = h;
5637     surface->flags &= ~SFLAG_DS_LOCATIONS;
5638     surface->flags |= location;
5639 }
5640
5641 /* Context activation is done by the caller. */
5642 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5643 {
5644     struct wined3d_device *device = surface->resource.device;
5645     const struct wined3d_gl_info *gl_info = context->gl_info;
5646     GLsizei w, h;
5647
5648     TRACE("surface %p, new location %#x.\n", surface, location);
5649
5650     /* TODO: Make this work for modes other than FBO */
5651     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5652
5653     if (!(surface->flags & location))
5654     {
5655         w = surface->ds_current_size.cx;
5656         h = surface->ds_current_size.cy;
5657         surface->ds_current_size.cx = 0;
5658         surface->ds_current_size.cy = 0;
5659     }
5660     else
5661     {
5662         w = surface->resource.width;
5663         h = surface->resource.height;
5664     }
5665
5666     if (surface->ds_current_size.cx == surface->resource.width
5667             && surface->ds_current_size.cy == surface->resource.height)
5668     {
5669         TRACE("Location (%#x) is already up to date.\n", location);
5670         return;
5671     }
5672
5673     if (surface->current_renderbuffer)
5674     {
5675         FIXME("Not supported with fixed up depth stencil.\n");
5676         return;
5677     }
5678
5679     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5680     {
5681         /* This mostly happens when a depth / stencil is used without being
5682          * cleared first. In principle we could upload from sysmem, or
5683          * explicitly clear before first usage. For the moment there don't
5684          * appear to be a lot of applications depending on this, so a FIXME
5685          * should do. */
5686         FIXME("No up to date depth stencil location.\n");
5687         surface->flags |= location;
5688         surface->ds_current_size.cx = surface->resource.width;
5689         surface->ds_current_size.cy = surface->resource.height;
5690         return;
5691     }
5692
5693     if (location == SFLAG_DS_OFFSCREEN)
5694     {
5695         GLint old_binding = 0;
5696         GLenum bind_target;
5697
5698         /* The render target is allowed to be smaller than the depth/stencil
5699          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5700          * than the offscreen surface. Don't overwrite the offscreen surface
5701          * with undefined data. */
5702         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5703         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5704
5705         TRACE("Copying onscreen depth buffer to depth texture.\n");
5706
5707         ENTER_GL();
5708
5709         if (!device->depth_blt_texture)
5710         {
5711             glGenTextures(1, &device->depth_blt_texture);
5712         }
5713
5714         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5715          * directly on the FBO texture. That's because we need to flip. */
5716         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5717                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5718         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5719         {
5720             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5721             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5722         }
5723         else
5724         {
5725             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5726             bind_target = GL_TEXTURE_2D;
5727         }
5728         glBindTexture(bind_target, device->depth_blt_texture);
5729         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5730          * internal format, because the internal format might include stencil
5731          * data. In principle we should copy stencil data as well, but unless
5732          * the driver supports stencil export it's hard to do, and doesn't
5733          * seem to be needed in practice. If the hardware doesn't support
5734          * writing stencil data, the glCopyTexImage2D() call might trigger
5735          * software fallbacks. */
5736         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5737         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5738         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5739         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5740         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5741         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5742         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5743         glBindTexture(bind_target, old_binding);
5744
5745         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5746                 NULL, surface, SFLAG_INTEXTURE);
5747         context_set_draw_buffer(context, GL_NONE);
5748         glReadBuffer(GL_NONE);
5749
5750         /* Do the actual blit */
5751         surface_depth_blt(surface, gl_info, device->depth_blt_texture, 0, 0, w, h, bind_target);
5752         checkGLcall("depth_blt");
5753
5754         context_invalidate_state(context, STATE_FRAMEBUFFER);
5755
5756         LEAVE_GL();
5757
5758         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5759     }
5760     else if (location == SFLAG_DS_ONSCREEN)
5761     {
5762         TRACE("Copying depth texture to onscreen depth buffer.\n");
5763
5764         ENTER_GL();
5765
5766         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5767                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5768         surface_depth_blt(surface, gl_info, surface->texture_name,
5769                 0, surface->pow2Height - h, w, h, surface->texture_target);
5770         checkGLcall("depth_blt");
5771
5772         context_invalidate_state(context, STATE_FRAMEBUFFER);
5773
5774         LEAVE_GL();
5775
5776         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5777     }
5778     else
5779     {
5780         ERR("Invalid location (%#x) specified.\n", location);
5781     }
5782
5783     surface->flags |= location;
5784     surface->ds_current_size.cx = surface->resource.width;
5785     surface->ds_current_size.cy = surface->resource.height;
5786 }
5787
5788 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5789 {
5790     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5791     struct wined3d_surface *overlay;
5792
5793     TRACE("surface %p, location %s, persistent %#x.\n",
5794             surface, debug_surflocation(location), persistent);
5795
5796     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
5797     {
5798         if (surface_is_offscreen(surface))
5799         {
5800             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same
5801              * for offscreen targets. */
5802             if (location & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE))
5803                 location |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
5804         }
5805         else
5806         {
5807             TRACE("Surface %p is an onscreen surface.\n", surface);
5808         }
5809     }
5810
5811     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5812             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5813         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5814
5815     if (persistent)
5816     {
5817         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5818                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5819         {
5820             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5821             {
5822                 TRACE("Passing to container.\n");
5823                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5824             }
5825         }
5826         surface->flags &= ~SFLAG_LOCATIONS;
5827         surface->flags |= location;
5828
5829         /* Redraw emulated overlays, if any */
5830         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5831         {
5832             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5833             {
5834                 overlay->surface_ops->surface_draw_overlay(overlay);
5835             }
5836         }
5837     }
5838     else
5839     {
5840         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5841         {
5842             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5843             {
5844                 TRACE("Passing to container\n");
5845                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5846             }
5847         }
5848         surface->flags &= ~location;
5849     }
5850
5851     if (!(surface->flags & SFLAG_LOCATIONS))
5852     {
5853         ERR("Surface %p does not have any up to date location.\n", surface);
5854     }
5855 }
5856
5857 static DWORD resource_access_from_location(DWORD location)
5858 {
5859     switch (location)
5860     {
5861         case SFLAG_INSYSMEM:
5862             return WINED3D_RESOURCE_ACCESS_CPU;
5863
5864         case SFLAG_INDRAWABLE:
5865         case SFLAG_INSRGBTEX:
5866         case SFLAG_INTEXTURE:
5867             return WINED3D_RESOURCE_ACCESS_GPU;
5868
5869         default:
5870             FIXME("Unhandled location %#x.\n", location);
5871             return 0;
5872     }
5873 }
5874
5875 static void surface_load_sysmem(struct wined3d_surface *surface,
5876         const struct wined3d_gl_info *gl_info, const RECT *rect)
5877 {
5878     surface_prepare_system_memory(surface);
5879
5880     /* Download the surface to system memory. */
5881     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5882     {
5883         struct wined3d_device *device = surface->resource.device;
5884         struct wined3d_context *context = NULL;
5885
5886         if (!device->isInDraw)
5887             context = context_acquire(device, NULL);
5888
5889         surface_bind_and_dirtify(surface, gl_info, !(surface->flags & SFLAG_INTEXTURE));
5890         surface_download_data(surface, gl_info);
5891
5892         if (context)
5893             context_release(context);
5894
5895         return;
5896     }
5897
5898     /* Note: It might be faster to download into a texture first. */
5899     read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5900             wined3d_surface_get_pitch(surface));
5901 }
5902
5903 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5904         const struct wined3d_gl_info *gl_info, const RECT *rect)
5905 {
5906     struct wined3d_device *device = surface->resource.device;
5907     struct wined3d_format format;
5908     CONVERT_TYPES convert;
5909     UINT byte_count;
5910     BYTE *mem;
5911
5912     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5913         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5914
5915     if (surface->flags & SFLAG_INTEXTURE)
5916     {
5917         RECT r;
5918
5919         surface_get_rect(surface, rect, &r);
5920         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5921
5922         return WINED3D_OK;
5923     }
5924
5925     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5926     {
5927         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5928          * path through sysmem. */
5929         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5930     }
5931
5932     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5933
5934     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5935      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5936      * called. */
5937     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5938     {
5939         struct wined3d_context *context = NULL;
5940
5941         TRACE("Removing the pbo attached to surface %p.\n", surface);
5942
5943         if (!device->isInDraw)
5944             context = context_acquire(device, NULL);
5945
5946         surface_remove_pbo(surface, gl_info);
5947
5948         if (context)
5949             context_release(context);
5950     }
5951
5952     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5953     {
5954         UINT height = surface->resource.height;
5955         UINT width = surface->resource.width;
5956         UINT src_pitch, dst_pitch;
5957
5958         byte_count = format.conv_byte_count;
5959         src_pitch = wined3d_surface_get_pitch(surface);
5960
5961         /* Stick to the alignment for the converted surface too, makes it
5962          * easier to load the surface. */
5963         dst_pitch = width * byte_count;
5964         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5965
5966         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5967         {
5968             ERR("Out of memory (%u).\n", dst_pitch * height);
5969             return E_OUTOFMEMORY;
5970         }
5971
5972         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5973                 src_pitch, width, height, dst_pitch, convert, surface);
5974
5975         surface->flags |= SFLAG_CONVERTED;
5976     }
5977     else
5978     {
5979         surface->flags &= ~SFLAG_CONVERTED;
5980         mem = surface->resource.allocatedMemory;
5981         byte_count = format.byte_count;
5982     }
5983
5984     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5985
5986     /* Don't delete PBO memory. */
5987     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5988         HeapFree(GetProcessHeap(), 0, mem);
5989
5990     return WINED3D_OK;
5991 }
5992
5993 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5994         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5995 {
5996     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
5997     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5998     struct wined3d_device *device = surface->resource.device;
5999     struct wined3d_context *context = NULL;
6000     UINT width, src_pitch, dst_pitch;
6001     struct wined3d_bo_address data;
6002     struct wined3d_format format;
6003     POINT dst_point = {0, 0};
6004     CONVERT_TYPES convert;
6005     BYTE *mem;
6006
6007     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6008             && surface_is_offscreen(surface)
6009             && (surface->flags & SFLAG_INDRAWABLE))
6010     {
6011         read_from_framebuffer_texture(surface, srgb);
6012
6013         return WINED3D_OK;
6014     }
6015
6016     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6017             && (surface->resource.format->flags & attach_flags) == attach_flags
6018             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6019                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6020                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6021     {
6022         if (srgb)
6023             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
6024                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6025         else
6026             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
6027                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6028
6029         return WINED3D_OK;
6030     }
6031
6032     /* Upload from system memory */
6033
6034     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6035             TRUE /* We will use textures */, &format, &convert);
6036
6037     if (srgb)
6038     {
6039         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6040         {
6041             /* Performance warning... */
6042             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6043             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6044         }
6045     }
6046     else
6047     {
6048         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6049         {
6050             /* Performance warning... */
6051             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6052             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6053         }
6054     }
6055
6056     if (!(surface->flags & SFLAG_INSYSMEM))
6057     {
6058         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6059         /* Lets hope we get it from somewhere... */
6060         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6061     }
6062
6063     if (!device->isInDraw)
6064         context = context_acquire(device, NULL);
6065
6066     surface_prepare_texture(surface, gl_info, srgb);
6067     surface_bind_and_dirtify(surface, gl_info, srgb);
6068
6069     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6070     {
6071         surface->flags |= SFLAG_GLCKEY;
6072         surface->glCKey = surface->SrcBltCKey;
6073     }
6074     else surface->flags &= ~SFLAG_GLCKEY;
6075
6076     width = surface->resource.width;
6077     src_pitch = wined3d_surface_get_pitch(surface);
6078
6079     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6080      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6081      * called. */
6082     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6083     {
6084         TRACE("Removing the pbo attached to surface %p.\n", surface);
6085         surface_remove_pbo(surface, gl_info);
6086     }
6087
6088     if (format.convert)
6089     {
6090         /* This code is entered for texture formats which need a fixup. */
6091         UINT height = surface->resource.height;
6092
6093         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6094         dst_pitch = width * format.conv_byte_count;
6095         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6096
6097         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6098         {
6099             ERR("Out of memory (%u).\n", dst_pitch * height);
6100             if (context)
6101                 context_release(context);
6102             return E_OUTOFMEMORY;
6103         }
6104         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6105     }
6106     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6107     {
6108         /* This code is only entered for color keying fixups */
6109         UINT height = surface->resource.height;
6110
6111         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6112         dst_pitch = width * format.conv_byte_count;
6113         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6114
6115         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6116         {
6117             ERR("Out of memory (%u).\n", dst_pitch * height);
6118             if (context)
6119                 context_release(context);
6120             return E_OUTOFMEMORY;
6121         }
6122         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6123                 width, height, dst_pitch, convert, surface);
6124     }
6125     else
6126     {
6127         mem = surface->resource.allocatedMemory;
6128     }
6129
6130     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6131     data.addr = mem;
6132     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6133
6134     if (context)
6135         context_release(context);
6136
6137     /* Don't delete PBO memory. */
6138     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6139         HeapFree(GetProcessHeap(), 0, mem);
6140
6141     return WINED3D_OK;
6142 }
6143
6144 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6145 {
6146     struct wined3d_device *device = surface->resource.device;
6147     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6148     BOOL in_fbo = FALSE;
6149     HRESULT hr;
6150
6151     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6152
6153     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6154     {
6155         if (location == SFLAG_INTEXTURE)
6156         {
6157             struct wined3d_context *context = context_acquire(device, NULL);
6158             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6159             context_release(context);
6160             return WINED3D_OK;
6161         }
6162         else
6163         {
6164             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6165             return WINED3DERR_INVALIDCALL;
6166         }
6167     }
6168
6169     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6170     {
6171         if (surface_is_offscreen(surface))
6172         {
6173             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same
6174              * for offscreen targets. Prefer SFLAG_INTEXTURE. */
6175             if (location == SFLAG_INDRAWABLE)
6176                 location = SFLAG_INTEXTURE;
6177             in_fbo = TRUE;
6178         }
6179         else
6180         {
6181             TRACE("Surface %p is an onscreen surface.\n", surface);
6182         }
6183     }
6184
6185     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6186         location = SFLAG_INTEXTURE;
6187
6188     if (surface->flags & location)
6189     {
6190         TRACE("Location already up to date.\n");
6191         return WINED3D_OK;
6192     }
6193
6194     if (WARN_ON(d3d_surface))
6195     {
6196         DWORD required_access = resource_access_from_location(location);
6197         if ((surface->resource.access_flags & required_access) != required_access)
6198             WARN("Operation requires %#x access, but surface only has %#x.\n",
6199                     required_access, surface->resource.access_flags);
6200     }
6201
6202     if (!(surface->flags & SFLAG_LOCATIONS))
6203     {
6204         ERR("Surface %p does not have any up to date location.\n", surface);
6205         surface->flags |= SFLAG_LOST;
6206         return WINED3DERR_DEVICELOST;
6207     }
6208
6209     switch (location)
6210     {
6211         case SFLAG_INSYSMEM:
6212             surface_load_sysmem(surface, gl_info, rect);
6213             break;
6214
6215         case SFLAG_INDRAWABLE:
6216             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6217                 return hr;
6218             break;
6219
6220         case SFLAG_INTEXTURE:
6221         case SFLAG_INSRGBTEX:
6222             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6223                 return hr;
6224             break;
6225
6226         default:
6227             ERR("Don't know how to handle location %#x.\n", location);
6228             break;
6229     }
6230
6231     if (!rect)
6232     {
6233         surface->flags |= location;
6234
6235         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6236             surface_evict_sysmem(surface);
6237     }
6238
6239     if (in_fbo && (surface->flags & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)))
6240     {
6241         /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
6242         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
6243     }
6244
6245     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6246             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6247     {
6248         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6249     }
6250
6251     return WINED3D_OK;
6252 }
6253
6254 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6255 {
6256     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6257
6258     /* Not on a swapchain - must be offscreen */
6259     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6260
6261     /* The front buffer is always onscreen */
6262     if (surface == swapchain->front_buffer) return FALSE;
6263
6264     /* If the swapchain is rendered to an FBO, the backbuffer is
6265      * offscreen, otherwise onscreen */
6266     return swapchain->render_to_fbo;
6267 }
6268
6269 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6270 /* Context activation is done by the caller. */
6271 static void ffp_blit_free(struct wined3d_device *device) { }
6272
6273 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6274 /* Context activation is done by the caller. */
6275 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6276 {
6277     BYTE table[256][4];
6278     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6279
6280     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6281
6282     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6283     ENTER_GL();
6284     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6285     LEAVE_GL();
6286 }
6287
6288 /* Context activation is done by the caller. */
6289 static HRESULT ffp_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6290 {
6291     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6292
6293     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6294      * else the surface is converted in software at upload time in LoadLocation.
6295      */
6296     if(fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6297         ffp_blit_p8_upload_palette(surface, gl_info);
6298
6299     ENTER_GL();
6300     glEnable(surface->texture_target);
6301     checkGLcall("glEnable(surface->texture_target)");
6302     LEAVE_GL();
6303     return WINED3D_OK;
6304 }
6305
6306 /* Context activation is done by the caller. */
6307 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6308 {
6309     ENTER_GL();
6310     glDisable(GL_TEXTURE_2D);
6311     checkGLcall("glDisable(GL_TEXTURE_2D)");
6312     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6313     {
6314         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6315         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6316     }
6317     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6318     {
6319         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6320         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6321     }
6322     LEAVE_GL();
6323 }
6324
6325 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6326         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6327         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6328 {
6329     enum complex_fixup src_fixup;
6330
6331     switch (blit_op)
6332     {
6333         case WINED3D_BLIT_OP_COLOR_BLIT:
6334             src_fixup = get_complex_fixup(src_format->color_fixup);
6335             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6336             {
6337                 TRACE("Checking support for fixup:\n");
6338                 dump_color_fixup_desc(src_format->color_fixup);
6339             }
6340
6341             if (!is_identity_fixup(dst_format->color_fixup))
6342             {
6343                 TRACE("Destination fixups are not supported\n");
6344                 return FALSE;
6345             }
6346
6347             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6348             {
6349                 TRACE("P8 fixup supported\n");
6350                 return TRUE;
6351             }
6352
6353             /* We only support identity conversions. */
6354             if (is_identity_fixup(src_format->color_fixup))
6355             {
6356                 TRACE("[OK]\n");
6357                 return TRUE;
6358             }
6359
6360             TRACE("[FAILED]\n");
6361             return FALSE;
6362
6363         case WINED3D_BLIT_OP_COLOR_FILL:
6364             if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6365             {
6366                 TRACE("Color fill not supported\n");
6367                 return FALSE;
6368             }
6369
6370             return TRUE;
6371
6372         case WINED3D_BLIT_OP_DEPTH_FILL:
6373             return TRUE;
6374
6375         default:
6376             TRACE("Unsupported blit_op=%d\n", blit_op);
6377             return FALSE;
6378     }
6379 }
6380
6381 /* Do not call while under the GL lock. */
6382 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6383         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6384 {
6385     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6386     struct wined3d_fb_state fb = {&dst_surface, NULL};
6387
6388     return device_clear_render_targets(device, 1, &fb,
6389             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6390 }
6391
6392 /* Do not call while under the GL lock. */
6393 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6394         struct wined3d_surface *surface, const RECT *rect, float depth)
6395 {
6396     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6397     struct wined3d_fb_state fb = {NULL, surface};
6398
6399     return device_clear_render_targets(device, 0, &fb,
6400             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6401 }
6402
6403 const struct blit_shader ffp_blit =  {
6404     ffp_blit_alloc,
6405     ffp_blit_free,
6406     ffp_blit_set,
6407     ffp_blit_unset,
6408     ffp_blit_supported,
6409     ffp_blit_color_fill,
6410     ffp_blit_depth_fill,
6411 };
6412
6413 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6414 {
6415     return WINED3D_OK;
6416 }
6417
6418 /* Context activation is done by the caller. */
6419 static void cpu_blit_free(struct wined3d_device *device)
6420 {
6421 }
6422
6423 /* Context activation is done by the caller. */
6424 static HRESULT cpu_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6425 {
6426     return WINED3D_OK;
6427 }
6428
6429 /* Context activation is done by the caller. */
6430 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6431 {
6432 }
6433
6434 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6435         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6436         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6437 {
6438     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6439     {
6440         return TRUE;
6441     }
6442
6443     return FALSE;
6444 }
6445
6446 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6447         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6448         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6449 {
6450     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6451     const struct wined3d_format *src_format, *dst_format;
6452     struct wined3d_surface *orig_src = src_surface;
6453     WINED3DLOCKED_RECT dlock, slock;
6454     HRESULT hr = WINED3D_OK;
6455     const BYTE *sbuf;
6456     RECT xdst,xsrc;
6457     BYTE *dbuf;
6458     int x, y;
6459
6460     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6461             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6462             flags, fx, debug_d3dtexturefiltertype(filter));
6463
6464     xsrc = *src_rect;
6465
6466     if (!src_surface)
6467     {
6468         RECT full_rect;
6469
6470         full_rect.left = 0;
6471         full_rect.top = 0;
6472         full_rect.right = dst_surface->resource.width;
6473         full_rect.bottom = dst_surface->resource.height;
6474         IntersectRect(&xdst, &full_rect, dst_rect);
6475     }
6476     else
6477     {
6478         BOOL clip_horiz, clip_vert;
6479
6480         xdst = *dst_rect;
6481         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6482         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6483
6484         if (clip_vert || clip_horiz)
6485         {
6486             /* Now check if this is a special case or not... */
6487             if ((flags & WINEDDBLT_DDFX)
6488                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6489                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6490             {
6491                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6492                 return WINED3D_OK;
6493             }
6494
6495             if (clip_horiz)
6496             {
6497                 if (xdst.left < 0)
6498                 {
6499                     xsrc.left -= xdst.left;
6500                     xdst.left = 0;
6501                 }
6502                 if (xdst.right > dst_surface->resource.width)
6503                 {
6504                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6505                     xdst.right = (int)dst_surface->resource.width;
6506                 }
6507             }
6508
6509             if (clip_vert)
6510             {
6511                 if (xdst.top < 0)
6512                 {
6513                     xsrc.top -= xdst.top;
6514                     xdst.top = 0;
6515                 }
6516                 if (xdst.bottom > dst_surface->resource.height)
6517                 {
6518                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6519                     xdst.bottom = (int)dst_surface->resource.height;
6520                 }
6521             }
6522
6523             /* And check if after clipping something is still to be done... */
6524             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6525                     || (xdst.left >= (int)dst_surface->resource.width)
6526                     || (xdst.top >= (int)dst_surface->resource.height)
6527                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6528                     || (xsrc.left >= (int)src_surface->resource.width)
6529                     || (xsrc.top >= (int)src_surface->resource.height))
6530             {
6531                 TRACE("Nothing to be done after clipping.\n");
6532                 return WINED3D_OK;
6533             }
6534         }
6535     }
6536
6537     if (src_surface == dst_surface)
6538     {
6539         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6540         slock = dlock;
6541         src_format = dst_surface->resource.format;
6542         dst_format = src_format;
6543     }
6544     else
6545     {
6546         dst_format = dst_surface->resource.format;
6547         if (src_surface)
6548         {
6549             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6550             {
6551                 src_surface = surface_convert_format(src_surface, dst_format->id);
6552                 if (!src_surface)
6553                 {
6554                     /* The conv function writes a FIXME */
6555                     WARN("Cannot convert source surface format to dest format.\n");
6556                     goto release;
6557                 }
6558             }
6559             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6560             src_format = src_surface->resource.format;
6561         }
6562         else
6563         {
6564             src_format = dst_format;
6565         }
6566         if (dst_rect)
6567             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6568         else
6569             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6570     }
6571
6572     bpp = dst_surface->resource.format->byte_count;
6573     srcheight = xsrc.bottom - xsrc.top;
6574     srcwidth = xsrc.right - xsrc.left;
6575     dstheight = xdst.bottom - xdst.top;
6576     dstwidth = xdst.right - xdst.left;
6577     width = (xdst.right - xdst.left) * bpp;
6578
6579     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
6580     {
6581         UINT row_block_count;
6582
6583         if (flags || src_surface == dst_surface)
6584         {
6585             FIXME("Only plain blits supported on compressed surfaces.\n");
6586             hr = E_NOTIMPL;
6587             goto release;
6588         }
6589
6590         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6591
6592         if (srcheight != dstheight || srcwidth != dstwidth)
6593         {
6594             WARN("Stretching not supported on compressed surfaces.\n");
6595             hr = WINED3DERR_INVALIDCALL;
6596             goto release;
6597         }
6598
6599         dbuf = dlock.pBits;
6600         sbuf = slock.pBits;
6601
6602         row_block_count = (dstwidth + dst_format->block_width - 1) / dst_format->block_width;
6603         for (y = 0; y < dstheight; y += dst_format->block_height)
6604         {
6605             memcpy(dbuf, sbuf, row_block_count * dst_format->block_byte_count);
6606             dbuf += dlock.Pitch;
6607             sbuf += slock.Pitch;
6608         }
6609
6610         goto release;
6611     }
6612
6613     if (dst_rect && src_surface != dst_surface)
6614         dbuf = dlock.pBits;
6615     else
6616         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6617
6618     /* First, all the 'source-less' blits */
6619     if (flags & WINEDDBLT_COLORFILL)
6620     {
6621         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6622         flags &= ~WINEDDBLT_COLORFILL;
6623     }
6624
6625     if (flags & WINEDDBLT_DEPTHFILL)
6626     {
6627         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6628     }
6629     if (flags & WINEDDBLT_ROP)
6630     {
6631         /* Catch some degenerate cases here. */
6632         switch (fx->dwROP)
6633         {
6634             case BLACKNESS:
6635                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6636                 break;
6637             case 0xAA0029: /* No-op */
6638                 break;
6639             case WHITENESS:
6640                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6641                 break;
6642             case SRCCOPY: /* Well, we do that below? */
6643                 break;
6644             default:
6645                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6646                 goto error;
6647         }
6648         flags &= ~WINEDDBLT_ROP;
6649     }
6650     if (flags & WINEDDBLT_DDROPS)
6651     {
6652         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6653     }
6654     /* Now the 'with source' blits. */
6655     if (src_surface)
6656     {
6657         const BYTE *sbase;
6658         int sx, xinc, sy, yinc;
6659
6660         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6661             goto release;
6662
6663         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6664                 && (srcwidth != dstwidth || srcheight != dstheight))
6665         {
6666             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6667             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6668         }
6669
6670         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6671         xinc = (srcwidth << 16) / dstwidth;
6672         yinc = (srcheight << 16) / dstheight;
6673
6674         if (!flags)
6675         {
6676             /* No effects, we can cheat here. */
6677             if (dstwidth == srcwidth)
6678             {
6679                 if (dstheight == srcheight)
6680                 {
6681                     /* No stretching in either direction. This needs to be as
6682                      * fast as possible. */
6683                     sbuf = sbase;
6684
6685                     /* Check for overlapping surfaces. */
6686                     if (src_surface != dst_surface || xdst.top < xsrc.top
6687                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6688                     {
6689                         /* No overlap, or dst above src, so copy from top downwards. */
6690                         for (y = 0; y < dstheight; ++y)
6691                         {
6692                             memcpy(dbuf, sbuf, width);
6693                             sbuf += slock.Pitch;
6694                             dbuf += dlock.Pitch;
6695                         }
6696                     }
6697                     else if (xdst.top > xsrc.top)
6698                     {
6699                         /* Copy from bottom upwards. */
6700                         sbuf += (slock.Pitch*dstheight);
6701                         dbuf += (dlock.Pitch*dstheight);
6702                         for (y = 0; y < dstheight; ++y)
6703                         {
6704                             sbuf -= slock.Pitch;
6705                             dbuf -= dlock.Pitch;
6706                             memcpy(dbuf, sbuf, width);
6707                         }
6708                     }
6709                     else
6710                     {
6711                         /* Src and dst overlapping on the same line, use memmove. */
6712                         for (y = 0; y < dstheight; ++y)
6713                         {
6714                             memmove(dbuf, sbuf, width);
6715                             sbuf += slock.Pitch;
6716                             dbuf += dlock.Pitch;
6717                         }
6718                     }
6719                 }
6720                 else
6721                 {
6722                     /* Stretching in y direction only. */
6723                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6724                     {
6725                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6726                         memcpy(dbuf, sbuf, width);
6727                         dbuf += dlock.Pitch;
6728                     }
6729                 }
6730             }
6731             else
6732             {
6733                 /* Stretching in X direction. */
6734                 int last_sy = -1;
6735                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6736                 {
6737                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6738
6739                     if ((sy >> 16) == (last_sy >> 16))
6740                     {
6741                         /* This source row is the same as last source row -
6742                          * Copy the already stretched row. */
6743                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6744                     }
6745                     else
6746                     {
6747 #define STRETCH_ROW(type) \
6748 do { \
6749     const type *s = (const type *)sbuf; \
6750     type *d = (type *)dbuf; \
6751     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6752         d[x] = s[sx >> 16]; \
6753 } while(0)
6754
6755                         switch(bpp)
6756                         {
6757                             case 1:
6758                                 STRETCH_ROW(BYTE);
6759                                 break;
6760                             case 2:
6761                                 STRETCH_ROW(WORD);
6762                                 break;
6763                             case 4:
6764                                 STRETCH_ROW(DWORD);
6765                                 break;
6766                             case 3:
6767                             {
6768                                 const BYTE *s;
6769                                 BYTE *d = dbuf;
6770                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6771                                 {
6772                                     DWORD pixel;
6773
6774                                     s = sbuf + 3 * (sx >> 16);
6775                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6776                                     d[0] = (pixel      ) & 0xff;
6777                                     d[1] = (pixel >>  8) & 0xff;
6778                                     d[2] = (pixel >> 16) & 0xff;
6779                                     d += 3;
6780                                 }
6781                                 break;
6782                             }
6783                             default:
6784                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6785                                 hr = WINED3DERR_NOTAVAILABLE;
6786                                 goto error;
6787                         }
6788 #undef STRETCH_ROW
6789                     }
6790                     dbuf += dlock.Pitch;
6791                     last_sy = sy;
6792                 }
6793             }
6794         }
6795         else
6796         {
6797             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6798             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6799             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6800             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6801             {
6802                 /* The color keying flags are checked for correctness in ddraw */
6803                 if (flags & WINEDDBLT_KEYSRC)
6804                 {
6805                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6806                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6807                 }
6808                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6809                 {
6810                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6811                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6812                 }
6813
6814                 if (flags & WINEDDBLT_KEYDEST)
6815                 {
6816                     /* Destination color keys are taken from the source surface! */
6817                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6818                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6819                 }
6820                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6821                 {
6822                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6823                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6824                 }
6825
6826                 if (bpp == 1)
6827                 {
6828                     keymask = 0xff;
6829                 }
6830                 else
6831                 {
6832                     keymask = src_format->red_mask
6833                             | src_format->green_mask
6834                             | src_format->blue_mask;
6835                 }
6836                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6837             }
6838
6839             if (flags & WINEDDBLT_DDFX)
6840             {
6841                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6842                 LONG tmpxy;
6843                 dTopLeft     = dbuf;
6844                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6845                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6846                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6847
6848                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6849                 {
6850                     /* I don't think we need to do anything about this flag */
6851                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6852                 }
6853                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6854                 {
6855                     tmp          = dTopRight;
6856                     dTopRight    = dTopLeft;
6857                     dTopLeft     = tmp;
6858                     tmp          = dBottomRight;
6859                     dBottomRight = dBottomLeft;
6860                     dBottomLeft  = tmp;
6861                     dstxinc = dstxinc * -1;
6862                 }
6863                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6864                 {
6865                     tmp          = dTopLeft;
6866                     dTopLeft     = dBottomLeft;
6867                     dBottomLeft  = tmp;
6868                     tmp          = dTopRight;
6869                     dTopRight    = dBottomRight;
6870                     dBottomRight = tmp;
6871                     dstyinc = dstyinc * -1;
6872                 }
6873                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6874                 {
6875                     /* I don't think we need to do anything about this flag */
6876                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6877                 }
6878                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6879                 {
6880                     tmp          = dBottomRight;
6881                     dBottomRight = dTopLeft;
6882                     dTopLeft     = tmp;
6883                     tmp          = dBottomLeft;
6884                     dBottomLeft  = dTopRight;
6885                     dTopRight    = tmp;
6886                     dstxinc = dstxinc * -1;
6887                     dstyinc = dstyinc * -1;
6888                 }
6889                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6890                 {
6891                     tmp          = dTopLeft;
6892                     dTopLeft     = dBottomLeft;
6893                     dBottomLeft  = dBottomRight;
6894                     dBottomRight = dTopRight;
6895                     dTopRight    = tmp;
6896                     tmpxy   = dstxinc;
6897                     dstxinc = dstyinc;
6898                     dstyinc = tmpxy;
6899                     dstxinc = dstxinc * -1;
6900                 }
6901                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6902                 {
6903                     tmp          = dTopLeft;
6904                     dTopLeft     = dTopRight;
6905                     dTopRight    = dBottomRight;
6906                     dBottomRight = dBottomLeft;
6907                     dBottomLeft  = tmp;
6908                     tmpxy   = dstxinc;
6909                     dstxinc = dstyinc;
6910                     dstyinc = tmpxy;
6911                     dstyinc = dstyinc * -1;
6912                 }
6913                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6914                 {
6915                     /* I don't think we need to do anything about this flag */
6916                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6917                 }
6918                 dbuf = dTopLeft;
6919                 flags &= ~(WINEDDBLT_DDFX);
6920             }
6921
6922 #define COPY_COLORKEY_FX(type) \
6923 do { \
6924     const type *s; \
6925     type *d = (type *)dbuf, *dx, tmp; \
6926     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6927     { \
6928         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6929         dx = d; \
6930         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6931         { \
6932             tmp = s[sx >> 16]; \
6933             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6934                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6935             { \
6936                 dx[0] = tmp; \
6937             } \
6938             dx = (type *)(((BYTE *)dx) + dstxinc); \
6939         } \
6940         d = (type *)(((BYTE *)d) + dstyinc); \
6941     } \
6942 } while(0)
6943
6944             switch (bpp)
6945             {
6946                 case 1:
6947                     COPY_COLORKEY_FX(BYTE);
6948                     break;
6949                 case 2:
6950                     COPY_COLORKEY_FX(WORD);
6951                     break;
6952                 case 4:
6953                     COPY_COLORKEY_FX(DWORD);
6954                     break;
6955                 case 3:
6956                 {
6957                     const BYTE *s;
6958                     BYTE *d = dbuf, *dx;
6959                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6960                     {
6961                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6962                         dx = d;
6963                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
6964                         {
6965                             DWORD pixel, dpixel = 0;
6966                             s = sbuf + 3 * (sx>>16);
6967                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6968                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
6969                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
6970                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
6971                             {
6972                                 dx[0] = (pixel      ) & 0xff;
6973                                 dx[1] = (pixel >>  8) & 0xff;
6974                                 dx[2] = (pixel >> 16) & 0xff;
6975                             }
6976                             dx += dstxinc;
6977                         }
6978                         d += dstyinc;
6979                     }
6980                     break;
6981                 }
6982                 default:
6983                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
6984                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
6985                     hr = WINED3DERR_NOTAVAILABLE;
6986                     goto error;
6987 #undef COPY_COLORKEY_FX
6988             }
6989         }
6990     }
6991
6992 error:
6993     if (flags && FIXME_ON(d3d_surface))
6994     {
6995         FIXME("\tUnsupported flags: %#x.\n", flags);
6996     }
6997
6998 release:
6999     wined3d_surface_unmap(dst_surface);
7000     if (src_surface && src_surface != dst_surface)
7001         wined3d_surface_unmap(src_surface);
7002     /* Release the converted surface, if any. */
7003     if (src_surface && src_surface != orig_src)
7004         wined3d_surface_decref(src_surface);
7005
7006     return hr;
7007 }
7008
7009 /* Do not call while under the GL lock. */
7010 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7011         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7012 {
7013     WINEDDBLTFX BltFx;
7014
7015     memset(&BltFx, 0, sizeof(BltFx));
7016     BltFx.dwSize = sizeof(BltFx);
7017     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7018     return wined3d_surface_blt(dst_surface, dst_rect, NULL, NULL,
7019             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7020 }
7021
7022 /* Do not call while under the GL lock. */
7023 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7024         struct wined3d_surface *surface, const RECT *rect, float depth)
7025 {
7026     FIXME("Depth filling not implemented by cpu_blit.\n");
7027     return WINED3DERR_INVALIDCALL;
7028 }
7029
7030 const struct blit_shader cpu_blit =  {
7031     cpu_blit_alloc,
7032     cpu_blit_free,
7033     cpu_blit_set,
7034     cpu_blit_unset,
7035     cpu_blit_supported,
7036     cpu_blit_color_fill,
7037     cpu_blit_depth_fill,
7038 };
7039
7040 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7041         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7042         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7043         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7044 {
7045     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7046     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7047     unsigned int resource_size;
7048     HRESULT hr;
7049
7050     if (multisample_quality > 0)
7051     {
7052         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7053         multisample_quality = 0;
7054     }
7055
7056     /* Quick lockable sanity check.
7057      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7058      * this function is too deep to need to care about things like this.
7059      * Levels need to be checked too, since they all affect what can be done. */
7060     switch (pool)
7061     {
7062         case WINED3DPOOL_SCRATCH:
7063             if (!lockable)
7064             {
7065                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7066                         "which are mutually exclusive, setting lockable to TRUE.\n");
7067                 lockable = TRUE;
7068             }
7069             break;
7070
7071         case WINED3DPOOL_SYSTEMMEM:
7072             if (!lockable)
7073                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7074             break;
7075
7076         case WINED3DPOOL_MANAGED:
7077             if (usage & WINED3DUSAGE_DYNAMIC)
7078                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7079             break;
7080
7081         case WINED3DPOOL_DEFAULT:
7082             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7083                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7084             break;
7085
7086         default:
7087             FIXME("Unknown pool %#x.\n", pool);
7088             break;
7089     };
7090
7091     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7092         FIXME("Trying to create a render target that isn't in the default pool.\n");
7093
7094     /* FIXME: Check that the format is supported by the device. */
7095
7096     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7097     if (!resource_size)
7098         return WINED3DERR_INVALIDCALL;
7099
7100     surface->surface_type = surface_type;
7101
7102     switch (surface_type)
7103     {
7104         case SURFACE_OPENGL:
7105             surface->surface_ops = &surface_ops;
7106             break;
7107
7108         case SURFACE_GDI:
7109             surface->surface_ops = &gdi_surface_ops;
7110             break;
7111
7112         default:
7113             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7114             return WINED3DERR_INVALIDCALL;
7115     }
7116
7117     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7118             multisample_type, multisample_quality, usage, pool, width, height, 1,
7119             resource_size, parent, parent_ops, &surface_resource_ops);
7120     if (FAILED(hr))
7121     {
7122         WARN("Failed to initialize resource, returning %#x.\n", hr);
7123         return hr;
7124     }
7125
7126     /* "Standalone" surface. */
7127     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7128
7129     surface->texture_level = level;
7130     list_init(&surface->overlays);
7131
7132     /* Flags */
7133     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7134     if (discard)
7135         surface->flags |= SFLAG_DISCARD;
7136     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7137         surface->flags |= SFLAG_LOCKABLE;
7138     /* I'm not sure if this qualifies as a hack or as an optimization. It
7139      * seems reasonable to assume that lockable render targets will get
7140      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7141      * creation. However, the other reason we want to do this is that several
7142      * ddraw applications access surface memory while the surface isn't
7143      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7144      * future locks prevents these from crashing. */
7145     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7146         surface->flags |= SFLAG_DYNLOCK;
7147
7148     /* Mark the texture as dirty so that it gets loaded first time around. */
7149     surface_add_dirty_rect(surface, NULL);
7150     list_init(&surface->renderbuffers);
7151
7152     TRACE("surface %p, memory %p, size %u\n",
7153             surface, surface->resource.allocatedMemory, surface->resource.size);
7154
7155     /* Call the private setup routine */
7156     hr = surface->surface_ops->surface_private_setup(surface);
7157     if (FAILED(hr))
7158     {
7159         ERR("Private setup failed, returning %#x\n", hr);
7160         surface->surface_ops->surface_cleanup(surface);
7161         return hr;
7162     }
7163
7164     return hr;
7165 }
7166
7167 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7168         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7169         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7170         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7171 {
7172     struct wined3d_surface *object;
7173     HRESULT hr;
7174
7175     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7176             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7177     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7178             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7179     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7180
7181     if (surface_type == SURFACE_OPENGL && !device->adapter)
7182     {
7183         ERR("OpenGL surfaces are not available without OpenGL.\n");
7184         return WINED3DERR_NOTAVAILABLE;
7185     }
7186
7187     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7188     if (!object)
7189     {
7190         ERR("Failed to allocate surface memory.\n");
7191         return WINED3DERR_OUTOFVIDEOMEMORY;
7192     }
7193
7194     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7195             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7196     if (FAILED(hr))
7197     {
7198         WARN("Failed to initialize surface, returning %#x.\n", hr);
7199         HeapFree(GetProcessHeap(), 0, object);
7200         return hr;
7201     }
7202
7203     TRACE("Created surface %p.\n", object);
7204     *surface = object;
7205
7206     return hr;
7207 }