wined3d: Allow color fills on FBO attachable surfaces in ffp_blit_supported().
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     TRACE("surface %p.\n", surface);
46
47     if (surface->texture_name || (surface->flags & SFLAG_PBO) || !list_empty(&surface->renderbuffers))
48     {
49         struct wined3d_renderbuffer_entry *entry, *entry2;
50         const struct wined3d_gl_info *gl_info;
51         struct wined3d_context *context;
52
53         context = context_acquire(surface->resource.device, NULL);
54         gl_info = context->gl_info;
55
56         ENTER_GL();
57
58         if (surface->texture_name)
59         {
60             TRACE("Deleting texture %u.\n", surface->texture_name);
61             glDeleteTextures(1, &surface->texture_name);
62         }
63
64         if (surface->flags & SFLAG_PBO)
65         {
66             TRACE("Deleting PBO %u.\n", surface->pbo);
67             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
68         }
69
70         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
71         {
72             TRACE("Deleting renderbuffer %u.\n", entry->id);
73             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
74             HeapFree(GetProcessHeap(), 0, entry);
75         }
76
77         LEAVE_GL();
78
79         context_release(context);
80     }
81
82     if (surface->flags & SFLAG_DIBSECTION)
83     {
84         /* Release the DC. */
85         SelectObject(surface->hDC, surface->dib.holdbitmap);
86         DeleteDC(surface->hDC);
87         /* Release the DIB section. */
88         DeleteObject(surface->dib.DIBsection);
89         surface->dib.bitmap_data = NULL;
90         surface->resource.allocatedMemory = NULL;
91     }
92
93     if (surface->flags & SFLAG_USERPTR)
94         wined3d_surface_set_mem(surface, NULL);
95     if (surface->overlay_dest)
96         list_remove(&surface->overlay_entry);
97
98     HeapFree(GetProcessHeap(), 0, surface->palette9);
99
100     resource_cleanup(&surface->resource);
101 }
102
103 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
104 {
105     TRACE("surface %p, container %p.\n", surface, container);
106
107     if (!container && type != WINED3D_CONTAINER_NONE)
108         ERR("Setting NULL container of type %#x.\n", type);
109
110     if (type == WINED3D_CONTAINER_SWAPCHAIN)
111     {
112         surface->get_drawable_size = get_drawable_size_swapchain;
113     }
114     else
115     {
116         switch (wined3d_settings.offscreen_rendering_mode)
117         {
118             case ORM_FBO:
119                 surface->get_drawable_size = get_drawable_size_fbo;
120                 break;
121
122             case ORM_BACKBUFFER:
123                 surface->get_drawable_size = get_drawable_size_backbuffer;
124                 break;
125
126             default:
127                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
128                 return;
129         }
130     }
131
132     surface->container.type = type;
133     surface->container.u.base = container;
134 }
135
136 struct blt_info
137 {
138     GLenum binding;
139     GLenum bind_target;
140     enum tex_types tex_type;
141     GLfloat coords[4][3];
142 };
143
144 struct float_rect
145 {
146     float l;
147     float t;
148     float r;
149     float b;
150 };
151
152 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
153 {
154     f->l = ((r->left * 2.0f) / w) - 1.0f;
155     f->t = ((r->top * 2.0f) / h) - 1.0f;
156     f->r = ((r->right * 2.0f) / w) - 1.0f;
157     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
158 }
159
160 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
161 {
162     GLfloat (*coords)[3] = info->coords;
163     struct float_rect f;
164
165     switch (target)
166     {
167         default:
168             FIXME("Unsupported texture target %#x\n", target);
169             /* Fall back to GL_TEXTURE_2D */
170         case GL_TEXTURE_2D:
171             info->binding = GL_TEXTURE_BINDING_2D;
172             info->bind_target = GL_TEXTURE_2D;
173             info->tex_type = tex_2d;
174             coords[0][0] = (float)rect->left / w;
175             coords[0][1] = (float)rect->top / h;
176             coords[0][2] = 0.0f;
177
178             coords[1][0] = (float)rect->right / w;
179             coords[1][1] = (float)rect->top / h;
180             coords[1][2] = 0.0f;
181
182             coords[2][0] = (float)rect->left / w;
183             coords[2][1] = (float)rect->bottom / h;
184             coords[2][2] = 0.0f;
185
186             coords[3][0] = (float)rect->right / w;
187             coords[3][1] = (float)rect->bottom / h;
188             coords[3][2] = 0.0f;
189             break;
190
191         case GL_TEXTURE_RECTANGLE_ARB:
192             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
193             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
194             info->tex_type = tex_rect;
195             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
196             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
197             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
198             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
199             break;
200
201         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
202             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
203             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
204             info->tex_type = tex_cube;
205             cube_coords_float(rect, w, h, &f);
206
207             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
208             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
209             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
210             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
211             break;
212
213         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
214             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
215             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
216             info->tex_type = tex_cube;
217             cube_coords_float(rect, w, h, &f);
218
219             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
220             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
221             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
222             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
223             break;
224
225         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
226             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
227             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
228             info->tex_type = tex_cube;
229             cube_coords_float(rect, w, h, &f);
230
231             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
232             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
233             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
234             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
235             break;
236
237         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
238             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
239             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
240             info->tex_type = tex_cube;
241             cube_coords_float(rect, w, h, &f);
242
243             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
244             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
245             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
246             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
247             break;
248
249         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
250             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
251             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
252             info->tex_type = tex_cube;
253             cube_coords_float(rect, w, h, &f);
254
255             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
256             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
257             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
258             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
259             break;
260
261         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
262             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
263             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
264             info->tex_type = tex_cube;
265             cube_coords_float(rect, w, h, &f);
266
267             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
268             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
269             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
270             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
271             break;
272     }
273 }
274
275 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
276 {
277     if (rect_in)
278         *rect_out = *rect_in;
279     else
280     {
281         rect_out->left = 0;
282         rect_out->top = 0;
283         rect_out->right = surface->resource.width;
284         rect_out->bottom = surface->resource.height;
285     }
286 }
287
288 /* GL locking and context activation is done by the caller */
289 void draw_textured_quad(const struct wined3d_surface *src_surface, const RECT *src_rect,
290         const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
291 {
292     struct blt_info info;
293
294     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
295
296     glEnable(info.bind_target);
297     checkGLcall("glEnable(bind_target)");
298
299     /* Bind the texture */
300     glBindTexture(info.bind_target, src_surface->texture_name);
301     checkGLcall("glBindTexture");
302
303     /* Filtering for StretchRect */
304     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
305             wined3d_gl_mag_filter(magLookup, Filter));
306     checkGLcall("glTexParameteri");
307     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
308             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
309     checkGLcall("glTexParameteri");
310     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
311     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
312     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
313     checkGLcall("glTexEnvi");
314
315     /* Draw a quad */
316     glBegin(GL_TRIANGLE_STRIP);
317     glTexCoord3fv(info.coords[0]);
318     glVertex2i(dst_rect->left, dst_rect->top);
319
320     glTexCoord3fv(info.coords[1]);
321     glVertex2i(dst_rect->right, dst_rect->top);
322
323     glTexCoord3fv(info.coords[2]);
324     glVertex2i(dst_rect->left, dst_rect->bottom);
325
326     glTexCoord3fv(info.coords[3]);
327     glVertex2i(dst_rect->right, dst_rect->bottom);
328     glEnd();
329
330     /* Unbind the texture */
331     glBindTexture(info.bind_target, 0);
332     checkGLcall("glBindTexture(info->bind_target, 0)");
333
334     /* We changed the filtering settings on the texture. Inform the
335      * container about this to get the filters reset properly next draw. */
336     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
337     {
338         struct wined3d_texture *texture = src_surface->container.u.texture;
339         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
340         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
341         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
342     }
343 }
344
345 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
346 {
347     const struct wined3d_format *format = surface->resource.format;
348     SYSTEM_INFO sysInfo;
349     BITMAPINFO *b_info;
350     int extraline = 0;
351     DWORD *masks;
352     UINT usage;
353     HDC dc;
354
355     TRACE("surface %p.\n", surface);
356
357     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
358     {
359         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
360         return WINED3DERR_INVALIDCALL;
361     }
362
363     switch (format->byte_count)
364     {
365         case 2:
366         case 4:
367             /* Allocate extra space to store the RGB bit masks. */
368             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
369             break;
370
371         case 3:
372             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
373             break;
374
375         default:
376             /* Allocate extra space for a palette. */
377             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
378                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
379             break;
380     }
381
382     if (!b_info)
383         return E_OUTOFMEMORY;
384
385     /* Some applications access the surface in via DWORDs, and do not take
386      * the necessary care at the end of the surface. So we need at least
387      * 4 extra bytes at the end of the surface. Check against the page size,
388      * if the last page used for the surface has at least 4 spare bytes we're
389      * safe, otherwise add an extra line to the DIB section. */
390     GetSystemInfo(&sysInfo);
391     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
392     {
393         extraline = 1;
394         TRACE("Adding an extra line to the DIB section.\n");
395     }
396
397     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
398     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
399     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
400     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
401     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
402             * wined3d_surface_get_pitch(surface);
403     b_info->bmiHeader.biPlanes = 1;
404     b_info->bmiHeader.biBitCount = format->byte_count * 8;
405
406     b_info->bmiHeader.biXPelsPerMeter = 0;
407     b_info->bmiHeader.biYPelsPerMeter = 0;
408     b_info->bmiHeader.biClrUsed = 0;
409     b_info->bmiHeader.biClrImportant = 0;
410
411     /* Get the bit masks */
412     masks = (DWORD *)b_info->bmiColors;
413     switch (surface->resource.format->id)
414     {
415         case WINED3DFMT_B8G8R8_UNORM:
416             usage = DIB_RGB_COLORS;
417             b_info->bmiHeader.biCompression = BI_RGB;
418             break;
419
420         case WINED3DFMT_B5G5R5X1_UNORM:
421         case WINED3DFMT_B5G5R5A1_UNORM:
422         case WINED3DFMT_B4G4R4A4_UNORM:
423         case WINED3DFMT_B4G4R4X4_UNORM:
424         case WINED3DFMT_B2G3R3_UNORM:
425         case WINED3DFMT_B2G3R3A8_UNORM:
426         case WINED3DFMT_R10G10B10A2_UNORM:
427         case WINED3DFMT_R8G8B8A8_UNORM:
428         case WINED3DFMT_R8G8B8X8_UNORM:
429         case WINED3DFMT_B10G10R10A2_UNORM:
430         case WINED3DFMT_B5G6R5_UNORM:
431         case WINED3DFMT_R16G16B16A16_UNORM:
432             usage = 0;
433             b_info->bmiHeader.biCompression = BI_BITFIELDS;
434             masks[0] = format->red_mask;
435             masks[1] = format->green_mask;
436             masks[2] = format->blue_mask;
437             break;
438
439         default:
440             /* Don't know palette */
441             b_info->bmiHeader.biCompression = BI_RGB;
442             usage = 0;
443             break;
444     }
445
446     if (!(dc = GetDC(0)))
447     {
448         HeapFree(GetProcessHeap(), 0, b_info);
449         return HRESULT_FROM_WIN32(GetLastError());
450     }
451
452     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
453             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
454             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
455     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
456     ReleaseDC(0, dc);
457
458     if (!surface->dib.DIBsection)
459     {
460         ERR("Failed to create DIB section.\n");
461         HeapFree(GetProcessHeap(), 0, b_info);
462         return HRESULT_FROM_WIN32(GetLastError());
463     }
464
465     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
466     /* Copy the existing surface to the dib section. */
467     if (surface->resource.allocatedMemory)
468     {
469         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
470                 surface->resource.height * wined3d_surface_get_pitch(surface));
471     }
472     else
473     {
474         /* This is to make maps read the GL texture although memory is allocated. */
475         surface->flags &= ~SFLAG_INSYSMEM;
476     }
477     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
478
479     HeapFree(GetProcessHeap(), 0, b_info);
480
481     /* Now allocate a DC. */
482     surface->hDC = CreateCompatibleDC(0);
483     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
484     TRACE("Using wined3d palette %p.\n", surface->palette);
485     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
486
487     surface->flags |= SFLAG_DIBSECTION;
488
489     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
490     surface->resource.heapMemory = NULL;
491
492     return WINED3D_OK;
493 }
494
495 static void surface_prepare_system_memory(struct wined3d_surface *surface)
496 {
497     struct wined3d_device *device = surface->resource.device;
498     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
499
500     TRACE("surface %p.\n", surface);
501
502     /* Performance optimization: Count how often a surface is locked, if it is
503      * locked regularly do not throw away the system memory copy. This avoids
504      * the need to download the surface from OpenGL all the time. The surface
505      * is still downloaded if the OpenGL texture is changed. */
506     if (!(surface->flags & SFLAG_DYNLOCK))
507     {
508         if (++surface->lockCount > MAXLOCKCOUNT)
509         {
510             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
511             surface->flags |= SFLAG_DYNLOCK;
512         }
513     }
514
515     /* Create a PBO for dynamically locked surfaces but don't do it for
516      * converted or NPOT surfaces. Also don't create a PBO for systemmem
517      * surfaces. */
518     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
519             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
520             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
521     {
522         struct wined3d_context *context;
523         GLenum error;
524
525         context = context_acquire(device, NULL);
526         ENTER_GL();
527
528         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
529         error = glGetError();
530         if (!surface->pbo || error != GL_NO_ERROR)
531             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
532
533         TRACE("Binding PBO %u.\n", surface->pbo);
534
535         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
536         checkGLcall("glBindBufferARB");
537
538         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
539                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
540         checkGLcall("glBufferDataARB");
541
542         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
543         checkGLcall("glBindBufferARB");
544
545         /* We don't need the system memory anymore and we can't even use it for PBOs. */
546         if (!(surface->flags & SFLAG_CLIENT))
547         {
548             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
549             surface->resource.heapMemory = NULL;
550         }
551         surface->resource.allocatedMemory = NULL;
552         surface->flags |= SFLAG_PBO;
553         LEAVE_GL();
554         context_release(context);
555     }
556     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
557     {
558         /* Whatever surface we have, make sure that there is memory allocated
559          * for the downloaded copy, or a PBO to map. */
560         if (!surface->resource.heapMemory)
561             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
562
563         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
564                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
565
566         if (surface->flags & SFLAG_INSYSMEM)
567             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
568     }
569 }
570
571 static void surface_evict_sysmem(struct wined3d_surface *surface)
572 {
573     if (surface->flags & SFLAG_DONOTFREE)
574         return;
575
576     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
577     surface->resource.allocatedMemory = NULL;
578     surface->resource.heapMemory = NULL;
579     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
580 }
581
582 /* Context activation is done by the caller. */
583 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
584         const struct wined3d_gl_info *gl_info, BOOL srgb)
585 {
586     struct wined3d_device *device = surface->resource.device;
587     DWORD active_sampler;
588     GLint active_texture;
589
590     /* We don't need a specific texture unit, but after binding the texture
591      * the current unit is dirty. Read the unit back instead of switching to
592      * 0, this avoids messing around with the state manager's GL states. The
593      * current texture unit should always be a valid one.
594      *
595      * To be more specific, this is tricky because we can implicitly be
596      * called from sampler() in state.c. This means we can't touch anything
597      * other than whatever happens to be the currently active texture, or we
598      * would risk marking already applied sampler states dirty again.
599      *
600      * TODO: Track the current active texture per GL context instead of using
601      * glGet(). */
602
603     ENTER_GL();
604     glGetIntegerv(GL_ACTIVE_TEXTURE, &active_texture);
605     LEAVE_GL();
606     active_sampler = device->rev_tex_unit_map[active_texture - GL_TEXTURE0_ARB];
607
608     if (active_sampler != WINED3D_UNMAPPED_STAGE)
609         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
610     surface_bind(surface, gl_info, srgb);
611 }
612
613 static void surface_force_reload(struct wined3d_surface *surface)
614 {
615     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
616 }
617
618 static void surface_release_client_storage(struct wined3d_surface *surface)
619 {
620     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
621
622     ENTER_GL();
623     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
624     if (surface->texture_name)
625     {
626         surface_bind_and_dirtify(surface, context->gl_info, FALSE);
627         glTexImage2D(surface->texture_target, surface->texture_level,
628                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
629     }
630     if (surface->texture_name_srgb)
631     {
632         surface_bind_and_dirtify(surface, context->gl_info, TRUE);
633         glTexImage2D(surface->texture_target, surface->texture_level,
634                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
635     }
636     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
637     LEAVE_GL();
638
639     context_release(context);
640
641     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
642     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
643     surface_force_reload(surface);
644 }
645
646 static HRESULT surface_private_setup(struct wined3d_surface *surface)
647 {
648     /* TODO: Check against the maximum texture sizes supported by the video card. */
649     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
650     unsigned int pow2Width, pow2Height;
651
652     TRACE("surface %p.\n", surface);
653
654     surface->texture_name = 0;
655     surface->texture_target = GL_TEXTURE_2D;
656
657     /* Non-power2 support */
658     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
659     {
660         pow2Width = surface->resource.width;
661         pow2Height = surface->resource.height;
662     }
663     else
664     {
665         /* Find the nearest pow2 match */
666         pow2Width = pow2Height = 1;
667         while (pow2Width < surface->resource.width)
668             pow2Width <<= 1;
669         while (pow2Height < surface->resource.height)
670             pow2Height <<= 1;
671     }
672     surface->pow2Width = pow2Width;
673     surface->pow2Height = pow2Height;
674
675     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
676     {
677         /* TODO: Add support for non power two compressed textures. */
678         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
679         {
680             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
681                   surface, surface->resource.width, surface->resource.height);
682             return WINED3DERR_NOTAVAILABLE;
683         }
684     }
685
686     if (pow2Width != surface->resource.width
687             || pow2Height != surface->resource.height)
688     {
689         surface->flags |= SFLAG_NONPOW2;
690     }
691
692     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
693             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
694     {
695         /* One of three options:
696          * 1: Do the same as we do with NPOT and scale the texture, (any
697          *    texture ops would require the texture to be scaled which is
698          *    potentially slow)
699          * 2: Set the texture to the maximum size (bad idea).
700          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
701          * 4: Create the surface, but allow it to be used only for DirectDraw
702          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
703          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
704          *    the render target. */
705         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
706         {
707             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
708             return WINED3DERR_NOTAVAILABLE;
709         }
710
711         /* We should never use this surface in combination with OpenGL! */
712         TRACE("Creating an oversized surface: %ux%u.\n",
713                 surface->pow2Width, surface->pow2Height);
714     }
715     else
716     {
717         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
718          * and EXT_PALETTED_TEXTURE is used in combination with texture
719          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
720          * EXT_PALETTED_TEXTURE doesn't work in combination with
721          * ARB_TEXTURE_RECTANGLE. */
722         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
723                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
724                 && gl_info->supported[EXT_PALETTED_TEXTURE]
725                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
726         {
727             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
728             surface->pow2Width = surface->resource.width;
729             surface->pow2Height = surface->resource.height;
730             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
731         }
732     }
733
734     switch (wined3d_settings.offscreen_rendering_mode)
735     {
736         case ORM_FBO:
737             surface->get_drawable_size = get_drawable_size_fbo;
738             break;
739
740         case ORM_BACKBUFFER:
741             surface->get_drawable_size = get_drawable_size_backbuffer;
742             break;
743
744         default:
745             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
746             return WINED3DERR_INVALIDCALL;
747     }
748
749     surface->flags |= SFLAG_INSYSMEM;
750
751     return WINED3D_OK;
752 }
753
754 static void surface_realize_palette(struct wined3d_surface *surface)
755 {
756     struct wined3d_palette *palette = surface->palette;
757
758     TRACE("surface %p.\n", surface);
759
760     if (!palette) return;
761
762     if (surface->resource.format->id == WINED3DFMT_P8_UINT
763             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
764     {
765         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
766         {
767             /* Make sure the texture is up to date. This call doesn't do
768              * anything if the texture is already up to date. */
769             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
770
771             /* We want to force a palette refresh, so mark the drawable as not being up to date */
772             if (!surface_is_offscreen(surface))
773                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
774         }
775         else
776         {
777             if (!(surface->flags & SFLAG_INSYSMEM))
778             {
779                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
780                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
781             }
782             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
783         }
784     }
785
786     if (surface->flags & SFLAG_DIBSECTION)
787     {
788         RGBQUAD col[256];
789         unsigned int i;
790
791         TRACE("Updating the DC's palette.\n");
792
793         for (i = 0; i < 256; ++i)
794         {
795             col[i].rgbRed   = palette->palents[i].peRed;
796             col[i].rgbGreen = palette->palents[i].peGreen;
797             col[i].rgbBlue  = palette->palents[i].peBlue;
798             col[i].rgbReserved = 0;
799         }
800         SetDIBColorTable(surface->hDC, 0, 256, col);
801     }
802
803     /* Propagate the changes to the drawable when we have a palette. */
804     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
805         surface_load_location(surface, SFLAG_INDRAWABLE, NULL);
806 }
807
808 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
809 {
810     HRESULT hr;
811
812     /* If there's no destination surface there is nothing to do. */
813     if (!surface->overlay_dest)
814         return WINED3D_OK;
815
816     /* Blt calls ModifyLocation on the dest surface, which in turn calls
817      * DrawOverlay to update the overlay. Prevent an endless recursion. */
818     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
819         return WINED3D_OK;
820
821     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
822     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
823             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
824     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
825
826     return hr;
827 }
828
829 static void surface_preload(struct wined3d_surface *surface)
830 {
831     TRACE("surface %p.\n", surface);
832
833     surface_internal_preload(surface, SRGB_ANY);
834 }
835
836 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
837 {
838     struct wined3d_device *device = surface->resource.device;
839     const RECT *pass_rect = rect;
840
841     TRACE("surface %p, rect %s, flags %#x.\n",
842             surface, wine_dbgstr_rect(rect), flags);
843
844     if (flags & WINED3DLOCK_DISCARD)
845     {
846         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
847         surface_prepare_system_memory(surface);
848         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
849     }
850     else
851     {
852         /* surface_load_location() does not check if the rectangle specifies
853          * the full surface. Most callers don't need that, so do it here. */
854         if (rect && !rect->top && !rect->left
855                 && rect->right == surface->resource.width
856                 && rect->bottom == surface->resource.height)
857             pass_rect = NULL;
858
859         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
860                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
861                 || surface == device->fb.render_targets[0])))
862             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
863     }
864
865     if (surface->flags & SFLAG_PBO)
866     {
867         const struct wined3d_gl_info *gl_info;
868         struct wined3d_context *context;
869
870         context = context_acquire(device, NULL);
871         gl_info = context->gl_info;
872
873         ENTER_GL();
874         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
875         checkGLcall("glBindBufferARB");
876
877         /* This shouldn't happen but could occur if some other function
878          * didn't handle the PBO properly. */
879         if (surface->resource.allocatedMemory)
880             ERR("The surface already has PBO memory allocated.\n");
881
882         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
883         checkGLcall("glMapBufferARB");
884
885         /* Make sure the PBO isn't set anymore in order not to break non-PBO
886          * calls. */
887         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
888         checkGLcall("glBindBufferARB");
889
890         LEAVE_GL();
891         context_release(context);
892     }
893
894     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
895     {
896         if (!rect)
897             surface_add_dirty_rect(surface, NULL);
898         else
899         {
900             WINED3DBOX b;
901
902             b.Left = rect->left;
903             b.Top = rect->top;
904             b.Right = rect->right;
905             b.Bottom = rect->bottom;
906             b.Front = 0;
907             b.Back = 1;
908             surface_add_dirty_rect(surface, &b);
909         }
910     }
911 }
912
913 static void surface_unmap(struct wined3d_surface *surface)
914 {
915     struct wined3d_device *device = surface->resource.device;
916     BOOL fullsurface;
917
918     TRACE("surface %p.\n", surface);
919
920     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
921
922     if (surface->flags & SFLAG_PBO)
923     {
924         const struct wined3d_gl_info *gl_info;
925         struct wined3d_context *context;
926
927         TRACE("Freeing PBO memory.\n");
928
929         context = context_acquire(device, NULL);
930         gl_info = context->gl_info;
931
932         ENTER_GL();
933         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
934         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
935         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
936         checkGLcall("glUnmapBufferARB");
937         LEAVE_GL();
938         context_release(context);
939
940         surface->resource.allocatedMemory = NULL;
941     }
942
943     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
944
945     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
946     {
947         TRACE("Not dirtified, nothing to do.\n");
948         goto done;
949     }
950
951     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
952             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
953     {
954         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
955         {
956             static BOOL warned = FALSE;
957             if (!warned)
958             {
959                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
960                 warned = TRUE;
961             }
962             goto done;
963         }
964
965         if (!surface->dirtyRect.left && !surface->dirtyRect.top
966                 && surface->dirtyRect.right == surface->resource.width
967                 && surface->dirtyRect.bottom == surface->resource.height)
968         {
969             fullsurface = TRUE;
970         }
971         else
972         {
973             /* TODO: Proper partial rectangle tracking. */
974             fullsurface = FALSE;
975             surface->flags |= SFLAG_INSYSMEM;
976         }
977
978         surface_load_location(surface, SFLAG_INDRAWABLE, fullsurface ? NULL : &surface->dirtyRect);
979
980         /* Partial rectangle tracking is not commonly implemented, it is only
981          * done for render targets. INSYSMEM was set before to tell
982          * surface_load_location() where to read the rectangle from.
983          * Indrawable is set because all modifications from the partial
984          * sysmem copy are written back to the drawable, thus the surface is
985          * merged again in the drawable. The sysmem copy is not fully up to
986          * date because only a subrectangle was read in Map(). */
987         if (!fullsurface)
988         {
989             surface_modify_location(surface, SFLAG_INDRAWABLE, TRUE);
990             surface_evict_sysmem(surface);
991         }
992
993         surface->dirtyRect.left = surface->resource.width;
994         surface->dirtyRect.top = surface->resource.height;
995         surface->dirtyRect.right = 0;
996         surface->dirtyRect.bottom = 0;
997     }
998     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
999     {
1000         FIXME("Depth / stencil buffer locking is not implemented.\n");
1001     }
1002
1003 done:
1004     /* Overlays have to be redrawn manually after changes with the GL implementation */
1005     if (surface->overlay_dest)
1006         surface->surface_ops->surface_draw_overlay(surface);
1007 }
1008
1009 static HRESULT surface_getdc(struct wined3d_surface *surface)
1010 {
1011     WINED3DLOCKED_RECT lock;
1012     HRESULT hr;
1013
1014     TRACE("surface %p.\n", surface);
1015
1016     /* Create a DIB section if there isn't a dc yet. */
1017     if (!surface->hDC)
1018     {
1019         if (surface->flags & SFLAG_CLIENT)
1020         {
1021             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1022             surface_release_client_storage(surface);
1023         }
1024         hr = surface_create_dib_section(surface);
1025         if (FAILED(hr))
1026             return WINED3DERR_INVALIDCALL;
1027
1028         /* Use the DIB section from now on if we are not using a PBO. */
1029         if (!(surface->flags & SFLAG_PBO))
1030             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1031     }
1032
1033     /* Map the surface. */
1034     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1035     if (FAILED(hr))
1036         ERR("Map failed, hr %#x.\n", hr);
1037
1038     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1039      * activates the allocatedMemory. */
1040     if (surface->flags & SFLAG_PBO)
1041         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->dib.bitmap_size);
1042
1043     return hr;
1044 }
1045
1046 static HRESULT surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1047 {
1048     TRACE("surface %p, override %p.\n", surface, override);
1049
1050     /* Flipping is only supported on render targets and overlays. */
1051     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
1052     {
1053         WARN("Tried to flip a non-render target, non-overlay surface.\n");
1054         return WINEDDERR_NOTFLIPPABLE;
1055     }
1056
1057     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1058     {
1059         flip_surface(surface, override);
1060
1061         /* Update the overlay if it is visible */
1062         if (surface->overlay_dest)
1063             return surface->surface_ops->surface_draw_overlay(surface);
1064         else
1065             return WINED3D_OK;
1066     }
1067
1068     return WINED3D_OK;
1069 }
1070
1071 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1072 {
1073     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1074         return FALSE;
1075     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1076         return FALSE;
1077     return TRUE;
1078 }
1079
1080 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1081         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1082 {
1083     const struct wined3d_gl_info *gl_info;
1084     struct wined3d_context *context;
1085     DWORD src_mask, dst_mask;
1086     GLbitfield gl_mask;
1087
1088     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1089             device, src_surface, wine_dbgstr_rect(src_rect),
1090             dst_surface, wine_dbgstr_rect(dst_rect));
1091
1092     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1093     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1094
1095     if (src_mask != dst_mask)
1096     {
1097         ERR("Incompatible formats %s and %s.\n",
1098                 debug_d3dformat(src_surface->resource.format->id),
1099                 debug_d3dformat(dst_surface->resource.format->id));
1100         return;
1101     }
1102
1103     if (!src_mask)
1104     {
1105         ERR("Not a depth / stencil format: %s.\n",
1106                 debug_d3dformat(src_surface->resource.format->id));
1107         return;
1108     }
1109
1110     gl_mask = 0;
1111     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1112         gl_mask |= GL_DEPTH_BUFFER_BIT;
1113     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1114         gl_mask |= GL_STENCIL_BUFFER_BIT;
1115
1116     /* Make sure the locations are up-to-date. Loading the destination
1117      * surface isn't required if the entire surface is overwritten. */
1118     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1119     if (!surface_is_full_rect(dst_surface, dst_rect))
1120         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1121
1122     context = context_acquire(device, NULL);
1123     if (!context->valid)
1124     {
1125         context_release(context);
1126         WARN("Invalid context, skipping blit.\n");
1127         return;
1128     }
1129
1130     gl_info = context->gl_info;
1131
1132     ENTER_GL();
1133
1134     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1135     glReadBuffer(GL_NONE);
1136     checkGLcall("glReadBuffer()");
1137     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1138
1139     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1140     context_set_draw_buffer(context, GL_NONE);
1141     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1142
1143     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1144     {
1145         glDepthMask(GL_TRUE);
1146         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1147     }
1148     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1149     {
1150         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1151         {
1152             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1153             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1154         }
1155         glStencilMask(~0U);
1156         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1157     }
1158
1159     glDisable(GL_SCISSOR_TEST);
1160     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1161
1162     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1163             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1164     checkGLcall("glBlitFramebuffer()");
1165
1166     LEAVE_GL();
1167
1168     if (wined3d_settings.strict_draw_ordering)
1169         wglFlush(); /* Flush to ensure ordering across contexts. */
1170
1171     context_release(context);
1172 }
1173
1174 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1175         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1176         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1177 {
1178     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1179         return FALSE;
1180
1181     /* Source and/or destination need to be on the GL side */
1182     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1183         return FALSE;
1184
1185     switch (blit_op)
1186     {
1187         case WINED3D_BLIT_OP_COLOR_BLIT:
1188             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1189                 return FALSE;
1190             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1191                 return FALSE;
1192             break;
1193
1194         case WINED3D_BLIT_OP_DEPTH_BLIT:
1195             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1196                 return FALSE;
1197             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1198                 return FALSE;
1199             break;
1200
1201         default:
1202             return FALSE;
1203     }
1204
1205     if (!(src_format->id == dst_format->id
1206             || (is_identity_fixup(src_format->color_fixup)
1207             && is_identity_fixup(dst_format->color_fixup))))
1208         return FALSE;
1209
1210     return TRUE;
1211 }
1212
1213 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1214 {
1215     const struct wined3d_format *format = surface->resource.format;
1216
1217     switch (format->id)
1218     {
1219         case WINED3DFMT_S1_UINT_D15_UNORM:
1220             *float_depth = depth / (float)0x00007fff;
1221             break;
1222
1223         case WINED3DFMT_D16_UNORM:
1224             *float_depth = depth / (float)0x0000ffff;
1225             break;
1226
1227         case WINED3DFMT_D24_UNORM_S8_UINT:
1228         case WINED3DFMT_X8D24_UNORM:
1229             *float_depth = depth / (float)0x00ffffff;
1230             break;
1231
1232         case WINED3DFMT_D32_UNORM:
1233             *float_depth = depth / (float)0xffffffff;
1234             break;
1235
1236         default:
1237             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1238             return FALSE;
1239     }
1240
1241     return TRUE;
1242 }
1243
1244 /* Do not call while under the GL lock. */
1245 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1246 {
1247     const struct wined3d_resource *resource = &surface->resource;
1248     struct wined3d_device *device = resource->device;
1249     const struct blit_shader *blitter;
1250
1251     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1252             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1253     if (!blitter)
1254     {
1255         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1256         return WINED3DERR_INVALIDCALL;
1257     }
1258
1259     return blitter->depth_fill(device, surface, rect, depth);
1260 }
1261
1262 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1263         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1264 {
1265     struct wined3d_device *device = src_surface->resource.device;
1266
1267     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1268             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1269             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1270         return WINED3DERR_INVALIDCALL;
1271
1272     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1273
1274     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1275             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1276     surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
1277
1278     return WINED3D_OK;
1279 }
1280
1281 /* Do not call while under the GL lock. */
1282 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1283         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1284         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1285 {
1286     const struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1287     struct wined3d_device *device = dst_surface->resource.device;
1288     DWORD src_ds_flags, dst_ds_flags;
1289     RECT src_rect, dst_rect;
1290
1291     static const DWORD simple_blit = WINEDDBLT_ASYNC
1292             | WINEDDBLT_COLORFILL
1293             | WINEDDBLT_WAIT
1294             | WINEDDBLT_DEPTHFILL
1295             | WINEDDBLT_DONOTWAIT;
1296
1297     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1298             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1299             flags, fx, debug_d3dtexturefiltertype(filter));
1300     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1301
1302     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1303     {
1304         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1305         return WINEDDERR_SURFACEBUSY;
1306     }
1307
1308     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1309
1310     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1311             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1312             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1313             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1314             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1315     {
1316         /* The destination rect can be out of bounds on the condition
1317          * that a clipper is set for the surface. */
1318         if (dst_surface->clipper)
1319             FIXME("Blit clipping not implemented.\n");
1320         else
1321             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1322         return WINEDDERR_INVALIDRECT;
1323     }
1324
1325     if (src_surface)
1326     {
1327         surface_get_rect(src_surface, src_rect_in, &src_rect);
1328
1329         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1330                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1331                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1332                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1333                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1334         {
1335             WARN("Application gave us bad source rectangle for Blt.\n");
1336             return WINEDDERR_INVALIDRECT;
1337         }
1338     }
1339     else
1340     {
1341         memset(&src_rect, 0, sizeof(src_rect));
1342     }
1343
1344     if (!fx || !(fx->dwDDFX))
1345         flags &= ~WINEDDBLT_DDFX;
1346
1347     if (flags & WINEDDBLT_WAIT)
1348         flags &= ~WINEDDBLT_WAIT;
1349
1350     if (flags & WINEDDBLT_ASYNC)
1351     {
1352         static unsigned int once;
1353
1354         if (!once++)
1355             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1356         flags &= ~WINEDDBLT_ASYNC;
1357     }
1358
1359     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1360     if (flags & WINEDDBLT_DONOTWAIT)
1361     {
1362         static unsigned int once;
1363
1364         if (!once++)
1365             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1366         flags &= ~WINEDDBLT_DONOTWAIT;
1367     }
1368
1369     if (!device->d3d_initialized)
1370     {
1371         WARN("D3D not initialized, using fallback.\n");
1372         goto cpu;
1373     }
1374
1375     if (flags & ~simple_blit)
1376     {
1377         WARN("Using fallback for complex blit (%#x).\n", flags);
1378         goto fallback;
1379     }
1380
1381     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1382         src_swapchain = src_surface->container.u.swapchain;
1383     else
1384         src_swapchain = NULL;
1385
1386     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1387         dst_swapchain = dst_surface->container.u.swapchain;
1388     else
1389         dst_swapchain = NULL;
1390
1391     /* This isn't strictly needed. FBO blits for example could deal with
1392      * cross-swapchain blits by first downloading the source to a texture
1393      * before switching to the destination context. We just have this here to
1394      * not have to deal with the issue, since cross-swapchain blits should be
1395      * rare. */
1396     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1397     {
1398         FIXME("Using fallback for cross-swapchain blit.\n");
1399         goto fallback;
1400     }
1401
1402     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1403     if (src_surface)
1404         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1405     else
1406         src_ds_flags = 0;
1407
1408     if (src_ds_flags || dst_ds_flags)
1409     {
1410         if (flags & WINEDDBLT_DEPTHFILL)
1411         {
1412             float depth;
1413
1414             TRACE("Depth fill.\n");
1415
1416             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1417                 return WINED3DERR_INVALIDCALL;
1418
1419             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1420                 return WINED3D_OK;
1421         }
1422         else
1423         {
1424             /* Accessing depth / stencil surfaces is supposed to fail while in
1425              * a scene, except for fills, which seem to work. */
1426             if (device->inScene)
1427             {
1428                 WARN("Rejecting depth / stencil access while in scene.\n");
1429                 return WINED3DERR_INVALIDCALL;
1430             }
1431
1432             if (src_ds_flags != dst_ds_flags)
1433             {
1434                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1435                 return WINED3DERR_INVALIDCALL;
1436             }
1437
1438             if (src_rect.top || src_rect.left
1439                     || src_rect.bottom != src_surface->resource.height
1440                     || src_rect.right != src_surface->resource.width)
1441             {
1442                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1443                         wine_dbgstr_rect(&src_rect));
1444                 return WINED3DERR_INVALIDCALL;
1445             }
1446
1447             if (dst_rect.top || dst_rect.left
1448                     || dst_rect.bottom != dst_surface->resource.height
1449                     || dst_rect.right != dst_surface->resource.width)
1450             {
1451                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1452                         wine_dbgstr_rect(&src_rect));
1453                 return WINED3DERR_INVALIDCALL;
1454             }
1455
1456             if (src_surface->resource.height != dst_surface->resource.height
1457                     || src_surface->resource.width != dst_surface->resource.width)
1458             {
1459                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1460                 return WINED3DERR_INVALIDCALL;
1461             }
1462
1463             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1464                 return WINED3D_OK;
1465         }
1466     }
1467
1468 fallback:
1469
1470     /* Special cases for render targets. */
1471     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1472             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1473     {
1474         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1475                 src_surface, &src_rect, flags, fx, filter)))
1476             return WINED3D_OK;
1477     }
1478
1479 cpu:
1480
1481     /* For the rest call the X11 surface implementation. For render targets
1482      * this should be implemented OpenGL accelerated in BltOverride, other
1483      * blits are rather rare. */
1484     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1485 }
1486
1487 /* Do not call while under the GL lock. */
1488 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1489         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1490 {
1491     RECT src_rect, dst_rect;
1492     DWORD flags = 0;
1493
1494     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect_in %s, trans %#x.\n",
1495             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1496
1497     surface_get_rect(src_surface, src_rect_in, &src_rect);
1498
1499     dst_rect.left = dst_x;
1500     dst_rect.top = dst_y;
1501     dst_rect.right = dst_x + src_rect.right - src_rect.left;
1502     dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1503
1504     if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1505         flags |= WINEDDBLT_KEYSRC;
1506     if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1507         flags |= WINEDDBLT_KEYDEST;
1508     if (trans & WINEDDBLTFAST_WAIT)
1509         flags |= WINEDDBLT_WAIT;
1510     if (trans & WINEDDBLTFAST_DONOTWAIT)
1511         flags |= WINEDDBLT_DONOTWAIT;
1512
1513     return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT);
1514 }
1515
1516 static HRESULT surface_set_mem(struct wined3d_surface *surface, void *mem)
1517 {
1518     TRACE("surface %p, mem %p.\n", surface, mem);
1519
1520     if (mem && mem != surface->resource.allocatedMemory)
1521     {
1522         void *release = NULL;
1523
1524         /* Do I have to copy the old surface content? */
1525         if (surface->flags & SFLAG_DIBSECTION)
1526         {
1527             SelectObject(surface->hDC, surface->dib.holdbitmap);
1528             DeleteDC(surface->hDC);
1529             /* Release the DIB section. */
1530             DeleteObject(surface->dib.DIBsection);
1531             surface->dib.bitmap_data = NULL;
1532             surface->resource.allocatedMemory = NULL;
1533             surface->hDC = NULL;
1534             surface->flags &= ~SFLAG_DIBSECTION;
1535         }
1536         else if (!(surface->flags & SFLAG_USERPTR))
1537         {
1538             release = surface->resource.heapMemory;
1539             surface->resource.heapMemory = NULL;
1540         }
1541         surface->resource.allocatedMemory = mem;
1542         surface->flags |= SFLAG_USERPTR;
1543
1544         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
1545         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1546
1547         /* For client textures OpenGL has to be notified. */
1548         if (surface->flags & SFLAG_CLIENT)
1549             surface_release_client_storage(surface);
1550
1551         /* Now free the old memory if any. */
1552         HeapFree(GetProcessHeap(), 0, release);
1553     }
1554     else if (surface->flags & SFLAG_USERPTR)
1555     {
1556         /* Map and GetDC will re-create the dib section and allocated memory. */
1557         surface->resource.allocatedMemory = NULL;
1558         /* HeapMemory should be NULL already. */
1559         if (surface->resource.heapMemory)
1560             ERR("User pointer surface has heap memory allocated.\n");
1561         surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
1562
1563         if (surface->flags & SFLAG_CLIENT)
1564             surface_release_client_storage(surface);
1565
1566         surface_prepare_system_memory(surface);
1567         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1568     }
1569
1570     return WINED3D_OK;
1571 }
1572
1573 /* Context activation is done by the caller. */
1574 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1575 {
1576     if (!surface->resource.heapMemory)
1577     {
1578         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1579         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1580                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1581     }
1582
1583     ENTER_GL();
1584     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1585     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1586     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1587             surface->resource.size, surface->resource.allocatedMemory));
1588     checkGLcall("glGetBufferSubDataARB");
1589     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1590     checkGLcall("glDeleteBuffersARB");
1591     LEAVE_GL();
1592
1593     surface->pbo = 0;
1594     surface->flags &= ~SFLAG_PBO;
1595 }
1596
1597 /* Do not call while under the GL lock. */
1598 static void surface_unload(struct wined3d_resource *resource)
1599 {
1600     struct wined3d_surface *surface = surface_from_resource(resource);
1601     struct wined3d_renderbuffer_entry *entry, *entry2;
1602     struct wined3d_device *device = resource->device;
1603     const struct wined3d_gl_info *gl_info;
1604     struct wined3d_context *context;
1605
1606     TRACE("surface %p.\n", surface);
1607
1608     if (resource->pool == WINED3DPOOL_DEFAULT)
1609     {
1610         /* Default pool resources are supposed to be destroyed before Reset is called.
1611          * Implicit resources stay however. So this means we have an implicit render target
1612          * or depth stencil. The content may be destroyed, but we still have to tear down
1613          * opengl resources, so we cannot leave early.
1614          *
1615          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1616          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1617          * or the depth stencil into an FBO the texture or render buffer will be removed
1618          * and all flags get lost
1619          */
1620         surface_init_sysmem(surface);
1621     }
1622     else
1623     {
1624         /* Load the surface into system memory */
1625         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1626         surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
1627     }
1628     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1629     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1630     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1631
1632     context = context_acquire(device, NULL);
1633     gl_info = context->gl_info;
1634
1635     /* Destroy PBOs, but load them into real sysmem before */
1636     if (surface->flags & SFLAG_PBO)
1637         surface_remove_pbo(surface, gl_info);
1638
1639     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1640      * all application-created targets the application has to release the surface
1641      * before calling _Reset
1642      */
1643     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1644     {
1645         ENTER_GL();
1646         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1647         LEAVE_GL();
1648         list_remove(&entry->entry);
1649         HeapFree(GetProcessHeap(), 0, entry);
1650     }
1651     list_init(&surface->renderbuffers);
1652     surface->current_renderbuffer = NULL;
1653
1654     /* If we're in a texture, the texture name belongs to the texture.
1655      * Otherwise, destroy it. */
1656     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1657     {
1658         ENTER_GL();
1659         glDeleteTextures(1, &surface->texture_name);
1660         surface->texture_name = 0;
1661         glDeleteTextures(1, &surface->texture_name_srgb);
1662         surface->texture_name_srgb = 0;
1663         LEAVE_GL();
1664     }
1665
1666     context_release(context);
1667
1668     resource_unload(resource);
1669 }
1670
1671 static const struct wined3d_resource_ops surface_resource_ops =
1672 {
1673     surface_unload,
1674 };
1675
1676 static const struct wined3d_surface_ops surface_ops =
1677 {
1678     surface_private_setup,
1679     surface_cleanup,
1680     surface_realize_palette,
1681     surface_draw_overlay,
1682     surface_preload,
1683     surface_map,
1684     surface_unmap,
1685     surface_getdc,
1686     surface_flip,
1687     surface_set_mem,
1688 };
1689
1690 /*****************************************************************************
1691  * Initializes the GDI surface, aka creates the DIB section we render to
1692  * The DIB section creation is done by calling GetDC, which will create the
1693  * section and releasing the dc to allow the app to use it. The dib section
1694  * will stay until the surface is released
1695  *
1696  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1697  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1698  * avoid confusion in the shared surface code.
1699  *
1700  * Returns:
1701  *  WINED3D_OK on success
1702  *  The return values of called methods on failure
1703  *
1704  *****************************************************************************/
1705 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1706 {
1707     HRESULT hr;
1708
1709     TRACE("surface %p.\n", surface);
1710
1711     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1712     {
1713         ERR("Overlays not yet supported by GDI surfaces.\n");
1714         return WINED3DERR_INVALIDCALL;
1715     }
1716
1717     /* Sysmem textures have memory already allocated - release it,
1718      * this avoids an unnecessary memcpy. */
1719     hr = surface_create_dib_section(surface);
1720     if (SUCCEEDED(hr))
1721     {
1722         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1723         surface->resource.heapMemory = NULL;
1724         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1725     }
1726
1727     /* We don't mind the nonpow2 stuff in GDI. */
1728     surface->pow2Width = surface->resource.width;
1729     surface->pow2Height = surface->resource.height;
1730
1731     return WINED3D_OK;
1732 }
1733
1734 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1735 {
1736     TRACE("surface %p.\n", surface);
1737
1738     if (surface->flags & SFLAG_DIBSECTION)
1739     {
1740         /* Release the DC. */
1741         SelectObject(surface->hDC, surface->dib.holdbitmap);
1742         DeleteDC(surface->hDC);
1743         /* Release the DIB section. */
1744         DeleteObject(surface->dib.DIBsection);
1745         surface->dib.bitmap_data = NULL;
1746         surface->resource.allocatedMemory = NULL;
1747     }
1748
1749     if (surface->flags & SFLAG_USERPTR)
1750         wined3d_surface_set_mem(surface, NULL);
1751     if (surface->overlay_dest)
1752         list_remove(&surface->overlay_entry);
1753
1754     HeapFree(GetProcessHeap(), 0, surface->palette9);
1755
1756     resource_cleanup(&surface->resource);
1757 }
1758
1759 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1760 {
1761     struct wined3d_palette *palette = surface->palette;
1762
1763     TRACE("surface %p.\n", surface);
1764
1765     if (!palette) return;
1766
1767     if (surface->flags & SFLAG_DIBSECTION)
1768     {
1769         RGBQUAD col[256];
1770         unsigned int i;
1771
1772         TRACE("Updating the DC's palette.\n");
1773
1774         for (i = 0; i < 256; ++i)
1775         {
1776             col[i].rgbRed = palette->palents[i].peRed;
1777             col[i].rgbGreen = palette->palents[i].peGreen;
1778             col[i].rgbBlue = palette->palents[i].peBlue;
1779             col[i].rgbReserved = 0;
1780         }
1781         SetDIBColorTable(surface->hDC, 0, 256, col);
1782     }
1783
1784     /* Update the image because of the palette change. Some games like e.g.
1785      * Red Alert call SetEntries a lot to implement fading. */
1786     /* Tell the swapchain to update the screen. */
1787     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1788     {
1789         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1790         if (surface == swapchain->front_buffer)
1791         {
1792             x11_copy_to_screen(swapchain, NULL);
1793         }
1794     }
1795 }
1796
1797 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
1798 {
1799     FIXME("GDI surfaces can't draw overlays yet.\n");
1800     return E_FAIL;
1801 }
1802
1803 static void gdi_surface_preload(struct wined3d_surface *surface)
1804 {
1805     TRACE("surface %p.\n", surface);
1806
1807     ERR("Preloading GDI surfaces is not supported.\n");
1808 }
1809
1810 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1811 {
1812     TRACE("surface %p, rect %s, flags %#x.\n",
1813             surface, wine_dbgstr_rect(rect), flags);
1814
1815     if (!surface->resource.allocatedMemory)
1816     {
1817         /* This happens on gdi surfaces if the application set a user pointer
1818          * and resets it. Recreate the DIB section. */
1819         surface_create_dib_section(surface);
1820         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1821     }
1822 }
1823
1824 static void gdi_surface_unmap(struct wined3d_surface *surface)
1825 {
1826     TRACE("surface %p.\n", surface);
1827
1828     /* Tell the swapchain to update the screen. */
1829     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1830     {
1831         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1832         if (surface == swapchain->front_buffer)
1833         {
1834             x11_copy_to_screen(swapchain, &surface->lockedRect);
1835         }
1836     }
1837
1838     memset(&surface->lockedRect, 0, sizeof(RECT));
1839 }
1840
1841 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
1842 {
1843     WINED3DLOCKED_RECT lock;
1844     HRESULT hr;
1845
1846     TRACE("surface %p.\n", surface);
1847
1848     /* Should have a DIB section already. */
1849     if (!(surface->flags & SFLAG_DIBSECTION))
1850     {
1851         WARN("DC not supported on this surface\n");
1852         return WINED3DERR_INVALIDCALL;
1853     }
1854
1855     /* Map the surface. */
1856     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1857     if (FAILED(hr))
1858         ERR("Map failed, hr %#x.\n", hr);
1859
1860     return hr;
1861 }
1862
1863 static HRESULT gdi_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1864 {
1865     TRACE("surface %p, override %p.\n", surface, override);
1866
1867     return WINED3D_OK;
1868 }
1869
1870 static HRESULT gdi_surface_set_mem(struct wined3d_surface *surface, void *mem)
1871 {
1872     TRACE("surface %p, mem %p.\n", surface, mem);
1873
1874     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
1875     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1876     {
1877         ERR("Not supported on render targets.\n");
1878         return WINED3DERR_INVALIDCALL;
1879     }
1880
1881     if (mem && mem != surface->resource.allocatedMemory)
1882     {
1883         void *release = NULL;
1884
1885         /* Do I have to copy the old surface content? */
1886         if (surface->flags & SFLAG_DIBSECTION)
1887         {
1888             SelectObject(surface->hDC, surface->dib.holdbitmap);
1889             DeleteDC(surface->hDC);
1890             /* Release the DIB section. */
1891             DeleteObject(surface->dib.DIBsection);
1892             surface->dib.bitmap_data = NULL;
1893             surface->resource.allocatedMemory = NULL;
1894             surface->hDC = NULL;
1895             surface->flags &= ~SFLAG_DIBSECTION;
1896         }
1897         else if (!(surface->flags & SFLAG_USERPTR))
1898         {
1899             release = surface->resource.allocatedMemory;
1900         }
1901         surface->resource.allocatedMemory = mem;
1902         surface->flags |= SFLAG_USERPTR | SFLAG_INSYSMEM;
1903
1904         /* Now free the old memory, if any. */
1905         HeapFree(GetProcessHeap(), 0, release);
1906     }
1907     else if (surface->flags & SFLAG_USERPTR)
1908     {
1909         /* Map() and GetDC() will re-create the dib section and allocated memory. */
1910         surface->resource.allocatedMemory = NULL;
1911         surface->flags &= ~SFLAG_USERPTR;
1912     }
1913
1914     return WINED3D_OK;
1915 }
1916
1917 static const struct wined3d_surface_ops gdi_surface_ops =
1918 {
1919     gdi_surface_private_setup,
1920     surface_gdi_cleanup,
1921     gdi_surface_realize_palette,
1922     gdi_surface_draw_overlay,
1923     gdi_surface_preload,
1924     gdi_surface_map,
1925     gdi_surface_unmap,
1926     gdi_surface_getdc,
1927     gdi_surface_flip,
1928     gdi_surface_set_mem,
1929 };
1930
1931 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
1932 {
1933     GLuint *name;
1934     DWORD flag;
1935
1936     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
1937
1938     if(srgb)
1939     {
1940         name = &surface->texture_name_srgb;
1941         flag = SFLAG_INSRGBTEX;
1942     }
1943     else
1944     {
1945         name = &surface->texture_name;
1946         flag = SFLAG_INTEXTURE;
1947     }
1948
1949     if (!*name && new_name)
1950     {
1951         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
1952          * surface has no texture name yet. See if we can get rid of this. */
1953         if (surface->flags & flag)
1954             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
1955         surface_modify_location(surface, flag, FALSE);
1956     }
1957
1958     *name = new_name;
1959     surface_force_reload(surface);
1960 }
1961
1962 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
1963 {
1964     TRACE("surface %p, target %#x.\n", surface, target);
1965
1966     if (surface->texture_target != target)
1967     {
1968         if (target == GL_TEXTURE_RECTANGLE_ARB)
1969         {
1970             surface->flags &= ~SFLAG_NORMCOORD;
1971         }
1972         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
1973         {
1974             surface->flags |= SFLAG_NORMCOORD;
1975         }
1976     }
1977     surface->texture_target = target;
1978     surface_force_reload(surface);
1979 }
1980
1981 /* Context activation is done by the caller. */
1982 void surface_bind(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
1983 {
1984     TRACE("surface %p, gl_info %p, srgb %#x.\n", surface, gl_info, srgb);
1985
1986     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
1987     {
1988         struct wined3d_texture *texture = surface->container.u.texture;
1989
1990         TRACE("Passing to container (%p).\n", texture);
1991         texture->texture_ops->texture_bind(texture, gl_info, srgb);
1992     }
1993     else
1994     {
1995         if (surface->texture_level)
1996         {
1997             ERR("Standalone surface %p is non-zero texture level %u.\n",
1998                     surface, surface->texture_level);
1999         }
2000
2001         if (srgb)
2002             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2003
2004         ENTER_GL();
2005
2006         if (!surface->texture_name)
2007         {
2008             glGenTextures(1, &surface->texture_name);
2009             checkGLcall("glGenTextures");
2010
2011             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2012
2013             glBindTexture(surface->texture_target, surface->texture_name);
2014             checkGLcall("glBindTexture");
2015             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2016             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2017             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2018             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2019             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2020             checkGLcall("glTexParameteri");
2021         }
2022         else
2023         {
2024             glBindTexture(surface->texture_target, surface->texture_name);
2025             checkGLcall("glBindTexture");
2026         }
2027
2028         LEAVE_GL();
2029     }
2030 }
2031
2032 /* This function checks if the primary render target uses the 8bit paletted format. */
2033 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
2034 {
2035     if (device->fb.render_targets && device->fb.render_targets[0])
2036     {
2037         const struct wined3d_surface *render_target = device->fb.render_targets[0];
2038         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
2039                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
2040             return TRUE;
2041     }
2042     return FALSE;
2043 }
2044
2045 /* This call just downloads data, the caller is responsible for binding the
2046  * correct texture. */
2047 /* Context activation is done by the caller. */
2048 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2049 {
2050     const struct wined3d_format *format = surface->resource.format;
2051
2052     /* Only support read back of converted P8 surfaces. */
2053     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2054     {
2055         FIXME("Readback conversion not supported for format %s.\n", debug_d3dformat(format->id));
2056         return;
2057     }
2058
2059     ENTER_GL();
2060
2061     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2062     {
2063         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2064                 surface, surface->texture_level, format->glFormat, format->glType,
2065                 surface->resource.allocatedMemory);
2066
2067         if (surface->flags & SFLAG_PBO)
2068         {
2069             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2070             checkGLcall("glBindBufferARB");
2071             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2072             checkGLcall("glGetCompressedTexImageARB");
2073             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2074             checkGLcall("glBindBufferARB");
2075         }
2076         else
2077         {
2078             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2079                     surface->texture_level, surface->resource.allocatedMemory));
2080             checkGLcall("glGetCompressedTexImageARB");
2081         }
2082
2083         LEAVE_GL();
2084     }
2085     else
2086     {
2087         void *mem;
2088         GLenum gl_format = format->glFormat;
2089         GLenum gl_type = format->glType;
2090         int src_pitch = 0;
2091         int dst_pitch = 0;
2092
2093         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2094         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2095         {
2096             gl_format = GL_ALPHA;
2097             gl_type = GL_UNSIGNED_BYTE;
2098         }
2099
2100         if (surface->flags & SFLAG_NONPOW2)
2101         {
2102             unsigned char alignment = surface->resource.device->surface_alignment;
2103             src_pitch = format->byte_count * surface->pow2Width;
2104             dst_pitch = wined3d_surface_get_pitch(surface);
2105             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2106             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2107         }
2108         else
2109         {
2110             mem = surface->resource.allocatedMemory;
2111         }
2112
2113         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2114                 surface, surface->texture_level, gl_format, gl_type, mem);
2115
2116         if (surface->flags & SFLAG_PBO)
2117         {
2118             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2119             checkGLcall("glBindBufferARB");
2120
2121             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2122             checkGLcall("glGetTexImage");
2123
2124             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2125             checkGLcall("glBindBufferARB");
2126         }
2127         else
2128         {
2129             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2130             checkGLcall("glGetTexImage");
2131         }
2132         LEAVE_GL();
2133
2134         if (surface->flags & SFLAG_NONPOW2)
2135         {
2136             const BYTE *src_data;
2137             BYTE *dst_data;
2138             UINT y;
2139             /*
2140              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2141              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2142              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2143              *
2144              * We're doing this...
2145              *
2146              * instead of boxing the texture :
2147              * |<-texture width ->|  -->pow2width|   /\
2148              * |111111111111111111|              |   |
2149              * |222 Texture 222222| boxed empty  | texture height
2150              * |3333 Data 33333333|              |   |
2151              * |444444444444444444|              |   \/
2152              * -----------------------------------   |
2153              * |     boxed  empty | boxed empty  | pow2height
2154              * |                  |              |   \/
2155              * -----------------------------------
2156              *
2157              *
2158              * we're repacking the data to the expected texture width
2159              *
2160              * |<-texture width ->|  -->pow2width|   /\
2161              * |111111111111111111222222222222222|   |
2162              * |222333333333333333333444444444444| texture height
2163              * |444444                           |   |
2164              * |                                 |   \/
2165              * |                                 |   |
2166              * |            empty                | pow2height
2167              * |                                 |   \/
2168              * -----------------------------------
2169              *
2170              * == is the same as
2171              *
2172              * |<-texture width ->|    /\
2173              * |111111111111111111|
2174              * |222222222222222222|texture height
2175              * |333333333333333333|
2176              * |444444444444444444|    \/
2177              * --------------------
2178              *
2179              * this also means that any references to allocatedMemory should work with the data as if were a
2180              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2181              *
2182              * internally the texture is still stored in a boxed format so any references to textureName will
2183              * get a boxed texture with width pow2width and not a texture of width resource.width.
2184              *
2185              * Performance should not be an issue, because applications normally do not lock the surfaces when
2186              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2187              * and doesn't have to be re-read. */
2188             src_data = mem;
2189             dst_data = surface->resource.allocatedMemory;
2190             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2191             for (y = 1; y < surface->resource.height; ++y)
2192             {
2193                 /* skip the first row */
2194                 src_data += src_pitch;
2195                 dst_data += dst_pitch;
2196                 memcpy(dst_data, src_data, dst_pitch);
2197             }
2198
2199             HeapFree(GetProcessHeap(), 0, mem);
2200         }
2201     }
2202
2203     /* Surface has now been downloaded */
2204     surface->flags |= SFLAG_INSYSMEM;
2205 }
2206
2207 /* This call just uploads data, the caller is responsible for binding the
2208  * correct texture. */
2209 /* Context activation is done by the caller. */
2210 void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2211         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2212         BOOL srgb, const struct wined3d_bo_address *data)
2213 {
2214     UINT update_w = src_rect->right - src_rect->left;
2215     UINT update_h = src_rect->bottom - src_rect->top;
2216
2217     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2218             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2219             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2220
2221     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2222         update_h *= format->heightscale;
2223
2224     ENTER_GL();
2225
2226     if (data->buffer_object)
2227     {
2228         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2229         checkGLcall("glBindBufferARB");
2230     }
2231
2232     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2233     {
2234         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2235         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2236         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2237         const BYTE *addr = data->addr;
2238         GLenum internal;
2239
2240         addr += (src_rect->top / format->block_height) * src_pitch;
2241         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2242
2243         if (srgb)
2244             internal = format->glGammaInternal;
2245         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2246             internal = format->rtInternal;
2247         else
2248             internal = format->glInternal;
2249
2250         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2251                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2252                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2253
2254         if (row_length == src_pitch)
2255         {
2256             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2257                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2258         }
2259         else
2260         {
2261             UINT row, y;
2262
2263             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2264              * can't use the unpack row length like below. */
2265             for (row = 0, y = dst_point->y; row < row_count; ++row)
2266             {
2267                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2268                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2269                 y += format->block_height;
2270                 addr += src_pitch;
2271             }
2272         }
2273         checkGLcall("glCompressedTexSubImage2DARB");
2274     }
2275     else
2276     {
2277         const BYTE *addr = data->addr;
2278
2279         addr += src_rect->top * src_w * format->byte_count;
2280         addr += src_rect->left * format->byte_count;
2281
2282         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2283                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2284                 update_w, update_h, format->glFormat, format->glType, addr);
2285
2286         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2287         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2288                 update_w, update_h, format->glFormat, format->glType, addr);
2289         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2290         checkGLcall("glTexSubImage2D");
2291     }
2292
2293     if (data->buffer_object)
2294     {
2295         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2296         checkGLcall("glBindBufferARB");
2297     }
2298
2299     LEAVE_GL();
2300
2301     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2302     {
2303         struct wined3d_device *device = surface->resource.device;
2304         unsigned int i;
2305
2306         for (i = 0; i < device->context_count; ++i)
2307         {
2308             context_surface_update(device->contexts[i], surface);
2309         }
2310     }
2311 }
2312
2313 /* This call just allocates the texture, the caller is responsible for binding
2314  * the correct texture. */
2315 /* Context activation is done by the caller. */
2316 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2317         const struct wined3d_format *format, BOOL srgb)
2318 {
2319     BOOL enable_client_storage = FALSE;
2320     GLsizei width = surface->pow2Width;
2321     GLsizei height = surface->pow2Height;
2322     const BYTE *mem = NULL;
2323     GLenum internal;
2324
2325     if (srgb)
2326     {
2327         internal = format->glGammaInternal;
2328     }
2329     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2330     {
2331         internal = format->rtInternal;
2332     }
2333     else
2334     {
2335         internal = format->glInternal;
2336     }
2337
2338     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2339
2340     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2341             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2342             internal, width, height, format->glFormat, format->glType);
2343
2344     ENTER_GL();
2345
2346     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2347     {
2348         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2349                 || !surface->resource.allocatedMemory)
2350         {
2351             /* In some cases we want to disable client storage.
2352              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2353              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2354              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2355              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2356              */
2357             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2358             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2359             surface->flags &= ~SFLAG_CLIENT;
2360             enable_client_storage = TRUE;
2361         }
2362         else
2363         {
2364             surface->flags |= SFLAG_CLIENT;
2365
2366             /* Point OpenGL to our allocated texture memory. Do not use
2367              * resource.allocatedMemory here because it might point into a
2368              * PBO. Instead use heapMemory, but get the alignment right. */
2369             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2370                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2371         }
2372     }
2373
2374     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2375     {
2376         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2377                 internal, width, height, 0, surface->resource.size, mem));
2378         checkGLcall("glCompressedTexImage2DARB");
2379     }
2380     else
2381     {
2382         glTexImage2D(surface->texture_target, surface->texture_level,
2383                 internal, width, height, 0, format->glFormat, format->glType, mem);
2384         checkGLcall("glTexImage2D");
2385     }
2386
2387     if(enable_client_storage) {
2388         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2389         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2390     }
2391     LEAVE_GL();
2392 }
2393
2394 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2395  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2396 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2397 /* GL locking is done by the caller */
2398 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2399 {
2400     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2401     struct wined3d_renderbuffer_entry *entry;
2402     GLuint renderbuffer = 0;
2403     unsigned int src_width, src_height;
2404     unsigned int width, height;
2405
2406     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2407     {
2408         width = rt->pow2Width;
2409         height = rt->pow2Height;
2410     }
2411     else
2412     {
2413         width = surface->pow2Width;
2414         height = surface->pow2Height;
2415     }
2416
2417     src_width = surface->pow2Width;
2418     src_height = surface->pow2Height;
2419
2420     /* A depth stencil smaller than the render target is not valid */
2421     if (width > src_width || height > src_height) return;
2422
2423     /* Remove any renderbuffer set if the sizes match */
2424     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2425             || (width == src_width && height == src_height))
2426     {
2427         surface->current_renderbuffer = NULL;
2428         return;
2429     }
2430
2431     /* Look if we've already got a renderbuffer of the correct dimensions */
2432     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2433     {
2434         if (entry->width == width && entry->height == height)
2435         {
2436             renderbuffer = entry->id;
2437             surface->current_renderbuffer = entry;
2438             break;
2439         }
2440     }
2441
2442     if (!renderbuffer)
2443     {
2444         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2445         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2446         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2447                 surface->resource.format->glInternal, width, height);
2448
2449         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2450         entry->width = width;
2451         entry->height = height;
2452         entry->id = renderbuffer;
2453         list_add_head(&surface->renderbuffers, &entry->entry);
2454
2455         surface->current_renderbuffer = entry;
2456     }
2457
2458     checkGLcall("set_compatible_renderbuffer");
2459 }
2460
2461 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2462 {
2463     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2464
2465     TRACE("surface %p.\n", surface);
2466
2467     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2468     {
2469         ERR("Surface %p is not on a swapchain.\n", surface);
2470         return GL_NONE;
2471     }
2472
2473     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2474     {
2475         if (swapchain->render_to_fbo)
2476         {
2477             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2478             return GL_COLOR_ATTACHMENT0;
2479         }
2480         TRACE("Returning GL_BACK\n");
2481         return GL_BACK;
2482     }
2483     else if (surface == swapchain->front_buffer)
2484     {
2485         TRACE("Returning GL_FRONT\n");
2486         return GL_FRONT;
2487     }
2488
2489     FIXME("Higher back buffer, returning GL_BACK\n");
2490     return GL_BACK;
2491 }
2492
2493 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2494 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2495 {
2496     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2497
2498     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2499         /* No partial locking for textures yet. */
2500         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2501
2502     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2503     if (dirty_rect)
2504     {
2505         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2506         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2507         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2508         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2509     }
2510     else
2511     {
2512         surface->dirtyRect.left = 0;
2513         surface->dirtyRect.top = 0;
2514         surface->dirtyRect.right = surface->resource.width;
2515         surface->dirtyRect.bottom = surface->resource.height;
2516     }
2517
2518     /* if the container is a texture then mark it dirty. */
2519     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2520     {
2521         TRACE("Passing to container.\n");
2522         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2523     }
2524 }
2525
2526 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
2527         DWORD color, WINED3DCOLORVALUE *float_color)
2528 {
2529     const struct wined3d_format *format = surface->resource.format;
2530     const struct wined3d_device *device = surface->resource.device;
2531
2532     switch (format->id)
2533     {
2534         case WINED3DFMT_P8_UINT:
2535             if (surface->palette)
2536             {
2537                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
2538                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
2539                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
2540             }
2541             else
2542             {
2543                 float_color->r = 0.0f;
2544                 float_color->g = 0.0f;
2545                 float_color->b = 0.0f;
2546             }
2547             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
2548             break;
2549
2550         case WINED3DFMT_B5G6R5_UNORM:
2551             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
2552             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
2553             float_color->b = (color & 0x1f) / 31.0f;
2554             float_color->a = 1.0f;
2555             break;
2556
2557         case WINED3DFMT_B8G8R8_UNORM:
2558         case WINED3DFMT_B8G8R8X8_UNORM:
2559             float_color->r = D3DCOLOR_R(color);
2560             float_color->g = D3DCOLOR_G(color);
2561             float_color->b = D3DCOLOR_B(color);
2562             float_color->a = 1.0f;
2563             break;
2564
2565         case WINED3DFMT_B8G8R8A8_UNORM:
2566             float_color->r = D3DCOLOR_R(color);
2567             float_color->g = D3DCOLOR_G(color);
2568             float_color->b = D3DCOLOR_B(color);
2569             float_color->a = D3DCOLOR_A(color);
2570             break;
2571
2572         default:
2573             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
2574             return FALSE;
2575     }
2576
2577     return TRUE;
2578 }
2579
2580 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2581 {
2582     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2583     BOOL ck_changed;
2584
2585     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2586
2587     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2588     {
2589         ERR("Not supported on scratch surfaces.\n");
2590         return WINED3DERR_INVALIDCALL;
2591     }
2592
2593     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2594
2595     /* Reload if either the texture and sysmem have different ideas about the
2596      * color key, or the actual key values changed. */
2597     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2598             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2599             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2600     {
2601         TRACE("Reloading because of color keying\n");
2602         /* To perform the color key conversion we need a sysmem copy of
2603          * the surface. Make sure we have it. */
2604
2605         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2606         /* Make sure the texture is reloaded because of the color key change,
2607          * this kills performance though :( */
2608         /* TODO: This is not necessarily needed with hw palettized texture support. */
2609         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2610         /* Switching color keying on / off may change the internal format. */
2611         if (ck_changed)
2612             surface_force_reload(surface);
2613     }
2614     else if (!(surface->flags & flag))
2615     {
2616         TRACE("Reloading because surface is dirty.\n");
2617     }
2618     else
2619     {
2620         TRACE("surface is already in texture\n");
2621         return WINED3D_OK;
2622     }
2623
2624     /* No partial locking for textures yet. */
2625     surface_load_location(surface, flag, NULL);
2626     surface_evict_sysmem(surface);
2627
2628     return WINED3D_OK;
2629 }
2630
2631 /* See also float_16_to_32() in wined3d_private.h */
2632 static inline unsigned short float_32_to_16(const float *in)
2633 {
2634     int exp = 0;
2635     float tmp = fabsf(*in);
2636     unsigned int mantissa;
2637     unsigned short ret;
2638
2639     /* Deal with special numbers */
2640     if (*in == 0.0f)
2641         return 0x0000;
2642     if (isnan(*in))
2643         return 0x7c01;
2644     if (isinf(*in))
2645         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2646
2647     if (tmp < powf(2, 10))
2648     {
2649         do
2650         {
2651             tmp = tmp * 2.0f;
2652             exp--;
2653         } while (tmp < powf(2, 10));
2654     }
2655     else if (tmp >= powf(2, 11))
2656     {
2657         do
2658         {
2659             tmp /= 2.0f;
2660             exp++;
2661         } while (tmp >= powf(2, 11));
2662     }
2663
2664     mantissa = (unsigned int)tmp;
2665     if (tmp - mantissa >= 0.5f)
2666         ++mantissa; /* Round to nearest, away from zero. */
2667
2668     exp += 10;  /* Normalize the mantissa. */
2669     exp += 15;  /* Exponent is encoded with excess 15. */
2670
2671     if (exp > 30) /* too big */
2672     {
2673         ret = 0x7c00; /* INF */
2674     }
2675     else if (exp <= 0)
2676     {
2677         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2678         while (exp <= 0)
2679         {
2680             mantissa = mantissa >> 1;
2681             ++exp;
2682         }
2683         ret = mantissa & 0x3ff;
2684     }
2685     else
2686     {
2687         ret = (exp << 10) | (mantissa & 0x3ff);
2688     }
2689
2690     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2691     return ret;
2692 }
2693
2694 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2695 {
2696     ULONG refcount;
2697
2698     TRACE("Surface %p, container %p of type %#x.\n",
2699             surface, surface->container.u.base, surface->container.type);
2700
2701     switch (surface->container.type)
2702     {
2703         case WINED3D_CONTAINER_TEXTURE:
2704             return wined3d_texture_incref(surface->container.u.texture);
2705
2706         case WINED3D_CONTAINER_SWAPCHAIN:
2707             return wined3d_swapchain_incref(surface->container.u.swapchain);
2708
2709         default:
2710             ERR("Unhandled container type %#x.\n", surface->container.type);
2711         case WINED3D_CONTAINER_NONE:
2712             break;
2713     }
2714
2715     refcount = InterlockedIncrement(&surface->resource.ref);
2716     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2717
2718     return refcount;
2719 }
2720
2721 /* Do not call while under the GL lock. */
2722 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2723 {
2724     ULONG refcount;
2725
2726     TRACE("Surface %p, container %p of type %#x.\n",
2727             surface, surface->container.u.base, surface->container.type);
2728
2729     switch (surface->container.type)
2730     {
2731         case WINED3D_CONTAINER_TEXTURE:
2732             return wined3d_texture_decref(surface->container.u.texture);
2733
2734         case WINED3D_CONTAINER_SWAPCHAIN:
2735             return wined3d_swapchain_decref(surface->container.u.swapchain);
2736
2737         default:
2738             ERR("Unhandled container type %#x.\n", surface->container.type);
2739         case WINED3D_CONTAINER_NONE:
2740             break;
2741     }
2742
2743     refcount = InterlockedDecrement(&surface->resource.ref);
2744     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2745
2746     if (!refcount)
2747     {
2748         surface->surface_ops->surface_cleanup(surface);
2749         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2750
2751         TRACE("Destroyed surface %p.\n", surface);
2752         HeapFree(GetProcessHeap(), 0, surface);
2753     }
2754
2755     return refcount;
2756 }
2757
2758 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2759 {
2760     return resource_set_priority(&surface->resource, priority);
2761 }
2762
2763 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2764 {
2765     return resource_get_priority(&surface->resource);
2766 }
2767
2768 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2769 {
2770     TRACE("surface %p.\n", surface);
2771
2772     surface->surface_ops->surface_preload(surface);
2773 }
2774
2775 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2776 {
2777     TRACE("surface %p.\n", surface);
2778
2779     return surface->resource.parent;
2780 }
2781
2782 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2783 {
2784     TRACE("surface %p.\n", surface);
2785
2786     return &surface->resource;
2787 }
2788
2789 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2790 {
2791     TRACE("surface %p, flags %#x.\n", surface, flags);
2792
2793     switch (flags)
2794     {
2795         case WINEDDGBS_CANBLT:
2796         case WINEDDGBS_ISBLTDONE:
2797             return WINED3D_OK;
2798
2799         default:
2800             return WINED3DERR_INVALIDCALL;
2801     }
2802 }
2803
2804 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2805 {
2806     TRACE("surface %p, flags %#x.\n", surface, flags);
2807
2808     /* XXX: DDERR_INVALIDSURFACETYPE */
2809
2810     switch (flags)
2811     {
2812         case WINEDDGFS_CANFLIP:
2813         case WINEDDGFS_ISFLIPDONE:
2814             return WINED3D_OK;
2815
2816         default:
2817             return WINED3DERR_INVALIDCALL;
2818     }
2819 }
2820
2821 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2822 {
2823     TRACE("surface %p.\n", surface);
2824
2825     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2826     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2827 }
2828
2829 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2830 {
2831     TRACE("surface %p.\n", surface);
2832
2833     /* So far we don't lose anything :) */
2834     surface->flags &= ~SFLAG_LOST;
2835     return WINED3D_OK;
2836 }
2837
2838 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2839 {
2840     TRACE("surface %p, palette %p.\n", surface, palette);
2841
2842     if (surface->palette == palette)
2843     {
2844         TRACE("Nop palette change.\n");
2845         return WINED3D_OK;
2846     }
2847
2848     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2849         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2850
2851     surface->palette = palette;
2852
2853     if (palette)
2854     {
2855         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2856             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2857
2858         surface->surface_ops->surface_realize_palette(surface);
2859     }
2860
2861     return WINED3D_OK;
2862 }
2863
2864 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2865         DWORD flags, const WINEDDCOLORKEY *color_key)
2866 {
2867     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
2868
2869     if (flags & WINEDDCKEY_COLORSPACE)
2870     {
2871         FIXME(" colorkey value not supported (%08x) !\n", flags);
2872         return WINED3DERR_INVALIDCALL;
2873     }
2874
2875     /* Dirtify the surface, but only if a key was changed. */
2876     if (color_key)
2877     {
2878         switch (flags & ~WINEDDCKEY_COLORSPACE)
2879         {
2880             case WINEDDCKEY_DESTBLT:
2881                 surface->DestBltCKey = *color_key;
2882                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
2883                 break;
2884
2885             case WINEDDCKEY_DESTOVERLAY:
2886                 surface->DestOverlayCKey = *color_key;
2887                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
2888                 break;
2889
2890             case WINEDDCKEY_SRCOVERLAY:
2891                 surface->SrcOverlayCKey = *color_key;
2892                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
2893                 break;
2894
2895             case WINEDDCKEY_SRCBLT:
2896                 surface->SrcBltCKey = *color_key;
2897                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
2898                 break;
2899         }
2900     }
2901     else
2902     {
2903         switch (flags & ~WINEDDCKEY_COLORSPACE)
2904         {
2905             case WINEDDCKEY_DESTBLT:
2906                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
2907                 break;
2908
2909             case WINEDDCKEY_DESTOVERLAY:
2910                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
2911                 break;
2912
2913             case WINEDDCKEY_SRCOVERLAY:
2914                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
2915                 break;
2916
2917             case WINEDDCKEY_SRCBLT:
2918                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
2919                 break;
2920         }
2921     }
2922
2923     return WINED3D_OK;
2924 }
2925
2926 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
2927 {
2928     TRACE("surface %p.\n", surface);
2929
2930     return surface->palette;
2931 }
2932
2933 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
2934 {
2935     const struct wined3d_format *format = surface->resource.format;
2936     DWORD pitch;
2937
2938     TRACE("surface %p.\n", surface);
2939
2940     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
2941     {
2942         /* Since compressed formats are block based, pitch means the amount of
2943          * bytes to the next row of block rather than the next row of pixels. */
2944         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
2945         pitch = row_block_count * format->block_byte_count;
2946     }
2947     else
2948     {
2949         unsigned char alignment = surface->resource.device->surface_alignment;
2950         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
2951         pitch = (pitch + alignment - 1) & ~(alignment - 1);
2952     }
2953
2954     TRACE("Returning %u.\n", pitch);
2955
2956     return pitch;
2957 }
2958
2959 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
2960 {
2961     TRACE("surface %p, mem %p.\n", surface, mem);
2962
2963     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
2964     {
2965         WARN("Surface is locked or the DC is in use.\n");
2966         return WINED3DERR_INVALIDCALL;
2967     }
2968
2969     return surface->surface_ops->surface_set_mem(surface, mem);
2970 }
2971
2972 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
2973 {
2974     LONG w, h;
2975
2976     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
2977
2978     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2979     {
2980         WARN("Not an overlay surface.\n");
2981         return WINEDDERR_NOTAOVERLAYSURFACE;
2982     }
2983
2984     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
2985     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
2986     surface->overlay_destrect.left = x;
2987     surface->overlay_destrect.top = y;
2988     surface->overlay_destrect.right = x + w;
2989     surface->overlay_destrect.bottom = y + h;
2990
2991     surface->surface_ops->surface_draw_overlay(surface);
2992
2993     return WINED3D_OK;
2994 }
2995
2996 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
2997 {
2998     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
2999
3000     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3001     {
3002         TRACE("Not an overlay surface.\n");
3003         return WINEDDERR_NOTAOVERLAYSURFACE;
3004     }
3005
3006     if (!surface->overlay_dest)
3007     {
3008         TRACE("Overlay not visible.\n");
3009         *x = 0;
3010         *y = 0;
3011         return WINEDDERR_OVERLAYNOTVISIBLE;
3012     }
3013
3014     *x = surface->overlay_destrect.left;
3015     *y = surface->overlay_destrect.top;
3016
3017     TRACE("Returning position %d, %d.\n", *x, *y);
3018
3019     return WINED3D_OK;
3020 }
3021
3022 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3023         DWORD flags, struct wined3d_surface *ref)
3024 {
3025     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3026
3027     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3028     {
3029         TRACE("Not an overlay surface.\n");
3030         return WINEDDERR_NOTAOVERLAYSURFACE;
3031     }
3032
3033     return WINED3D_OK;
3034 }
3035
3036 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3037         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3038 {
3039     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3040             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3041
3042     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3043     {
3044         WARN("Not an overlay surface.\n");
3045         return WINEDDERR_NOTAOVERLAYSURFACE;
3046     }
3047     else if (!dst_surface)
3048     {
3049         WARN("Dest surface is NULL.\n");
3050         return WINED3DERR_INVALIDCALL;
3051     }
3052
3053     if (src_rect)
3054     {
3055         surface->overlay_srcrect = *src_rect;
3056     }
3057     else
3058     {
3059         surface->overlay_srcrect.left = 0;
3060         surface->overlay_srcrect.top = 0;
3061         surface->overlay_srcrect.right = surface->resource.width;
3062         surface->overlay_srcrect.bottom = surface->resource.height;
3063     }
3064
3065     if (dst_rect)
3066     {
3067         surface->overlay_destrect = *dst_rect;
3068     }
3069     else
3070     {
3071         surface->overlay_destrect.left = 0;
3072         surface->overlay_destrect.top = 0;
3073         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3074         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3075     }
3076
3077     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3078     {
3079         list_remove(&surface->overlay_entry);
3080     }
3081
3082     if (flags & WINEDDOVER_SHOW)
3083     {
3084         if (surface->overlay_dest != dst_surface)
3085         {
3086             surface->overlay_dest = dst_surface;
3087             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3088         }
3089     }
3090     else if (flags & WINEDDOVER_HIDE)
3091     {
3092         /* tests show that the rectangles are erased on hide */
3093         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3094         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3095         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3096         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3097         surface->overlay_dest = NULL;
3098     }
3099
3100     surface->surface_ops->surface_draw_overlay(surface);
3101
3102     return WINED3D_OK;
3103 }
3104
3105 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3106 {
3107     TRACE("surface %p, clipper %p.\n", surface, clipper);
3108
3109     surface->clipper = clipper;
3110
3111     return WINED3D_OK;
3112 }
3113
3114 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3115 {
3116     TRACE("surface %p.\n", surface);
3117
3118     return surface->clipper;
3119 }
3120
3121 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3122 {
3123     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3124
3125     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3126
3127     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3128     {
3129         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3130         return WINED3DERR_INVALIDCALL;
3131     }
3132
3133     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3134             surface->pow2Width, surface->pow2Height);
3135     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3136     surface->resource.format = format;
3137
3138     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3139     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3140             format->glFormat, format->glInternal, format->glType);
3141
3142     return WINED3D_OK;
3143 }
3144
3145 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3146         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3147 {
3148     unsigned short *dst_s;
3149     const float *src_f;
3150     unsigned int x, y;
3151
3152     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3153
3154     for (y = 0; y < h; ++y)
3155     {
3156         src_f = (const float *)(src + y * pitch_in);
3157         dst_s = (unsigned short *) (dst + y * pitch_out);
3158         for (x = 0; x < w; ++x)
3159         {
3160             dst_s[x] = float_32_to_16(src_f + x);
3161         }
3162     }
3163 }
3164
3165 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3166         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3167 {
3168     static const unsigned char convert_5to8[] =
3169     {
3170         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3171         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3172         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3173         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3174     };
3175     static const unsigned char convert_6to8[] =
3176     {
3177         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3178         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3179         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3180         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3181         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3182         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3183         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3184         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3185     };
3186     unsigned int x, y;
3187
3188     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3189
3190     for (y = 0; y < h; ++y)
3191     {
3192         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3193         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3194         for (x = 0; x < w; ++x)
3195         {
3196             WORD pixel = src_line[x];
3197             dst_line[x] = 0xff000000
3198                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3199                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3200                     | convert_5to8[(pixel & 0x001f)];
3201         }
3202     }
3203 }
3204
3205 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3206  * in both cases we're just setting the X / Alpha channel to 0xff. */
3207 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3208         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3209 {
3210     unsigned int x, y;
3211
3212     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3213
3214     for (y = 0; y < h; ++y)
3215     {
3216         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3217         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3218
3219         for (x = 0; x < w; ++x)
3220         {
3221             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3222         }
3223     }
3224 }
3225
3226 static inline BYTE cliptobyte(int x)
3227 {
3228     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3229 }
3230
3231 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3232         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3233 {
3234     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3235     unsigned int x, y;
3236
3237     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3238
3239     for (y = 0; y < h; ++y)
3240     {
3241         const BYTE *src_line = src + y * pitch_in;
3242         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3243         for (x = 0; x < w; ++x)
3244         {
3245             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3246              *     C = Y - 16; D = U - 128; E = V - 128;
3247              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3248              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3249              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3250              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3251              * U and V are shared between the pixels. */
3252             if (!(x & 1)) /* For every even pixel, read new U and V. */
3253             {
3254                 d = (int) src_line[1] - 128;
3255                 e = (int) src_line[3] - 128;
3256                 r2 = 409 * e + 128;
3257                 g2 = - 100 * d - 208 * e + 128;
3258                 b2 = 516 * d + 128;
3259             }
3260             c2 = 298 * ((int) src_line[0] - 16);
3261             dst_line[x] = 0xff000000
3262                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3263                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3264                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3265                 /* Scale RGB values to 0..255 range,
3266                  * then clip them if still not in range (may be negative),
3267                  * then shift them within DWORD if necessary. */
3268             src_line += 2;
3269         }
3270     }
3271 }
3272
3273 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3274         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3275 {
3276     unsigned int x, y;
3277     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3278
3279     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3280
3281     for (y = 0; y < h; ++y)
3282     {
3283         const BYTE *src_line = src + y * pitch_in;
3284         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3285         for (x = 0; x < w; ++x)
3286         {
3287             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3288              *     C = Y - 16; D = U - 128; E = V - 128;
3289              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3290              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3291              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3292              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3293              * U and V are shared between the pixels. */
3294             if (!(x & 1)) /* For every even pixel, read new U and V. */
3295             {
3296                 d = (int) src_line[1] - 128;
3297                 e = (int) src_line[3] - 128;
3298                 r2 = 409 * e + 128;
3299                 g2 = - 100 * d - 208 * e + 128;
3300                 b2 = 516 * d + 128;
3301             }
3302             c2 = 298 * ((int) src_line[0] - 16);
3303             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3304                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3305                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3306                 /* Scale RGB values to 0..255 range,
3307                  * then clip them if still not in range (may be negative),
3308                  * then shift them within DWORD if necessary. */
3309             src_line += 2;
3310         }
3311     }
3312 }
3313
3314 struct d3dfmt_convertor_desc
3315 {
3316     enum wined3d_format_id from, to;
3317     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3318 };
3319
3320 static const struct d3dfmt_convertor_desc convertors[] =
3321 {
3322     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3323     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3324     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3325     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3326     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3327     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3328 };
3329
3330 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3331         enum wined3d_format_id to)
3332 {
3333     unsigned int i;
3334
3335     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3336     {
3337         if (convertors[i].from == from && convertors[i].to == to)
3338             return &convertors[i];
3339     }
3340
3341     return NULL;
3342 }
3343
3344 /*****************************************************************************
3345  * surface_convert_format
3346  *
3347  * Creates a duplicate of a surface in a different format. Is used by Blt to
3348  * blit between surfaces with different formats.
3349  *
3350  * Parameters
3351  *  source: Source surface
3352  *  fmt: Requested destination format
3353  *
3354  *****************************************************************************/
3355 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3356 {
3357     const struct d3dfmt_convertor_desc *conv;
3358     WINED3DLOCKED_RECT lock_src, lock_dst;
3359     struct wined3d_surface *ret = NULL;
3360     HRESULT hr;
3361
3362     conv = find_convertor(source->resource.format->id, to_fmt);
3363     if (!conv)
3364     {
3365         FIXME("Cannot find a conversion function from format %s to %s.\n",
3366                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3367         return NULL;
3368     }
3369
3370     wined3d_surface_create(source->resource.device, source->resource.width,
3371             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3372             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3373             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3374     if (!ret)
3375     {
3376         ERR("Failed to create a destination surface for conversion.\n");
3377         return NULL;
3378     }
3379
3380     memset(&lock_src, 0, sizeof(lock_src));
3381     memset(&lock_dst, 0, sizeof(lock_dst));
3382
3383     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3384     if (FAILED(hr))
3385     {
3386         ERR("Failed to lock the source surface.\n");
3387         wined3d_surface_decref(ret);
3388         return NULL;
3389     }
3390     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3391     if (FAILED(hr))
3392     {
3393         ERR("Failed to lock the destination surface.\n");
3394         wined3d_surface_unmap(source);
3395         wined3d_surface_decref(ret);
3396         return NULL;
3397     }
3398
3399     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3400             source->resource.width, source->resource.height);
3401
3402     wined3d_surface_unmap(ret);
3403     wined3d_surface_unmap(source);
3404
3405     return ret;
3406 }
3407
3408 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3409         unsigned int bpp, UINT pitch, DWORD color)
3410 {
3411     BYTE *first;
3412     int x, y;
3413
3414     /* Do first row */
3415
3416 #define COLORFILL_ROW(type) \
3417 do { \
3418     type *d = (type *)buf; \
3419     for (x = 0; x < width; ++x) \
3420         d[x] = (type)color; \
3421 } while(0)
3422
3423     switch (bpp)
3424     {
3425         case 1:
3426             COLORFILL_ROW(BYTE);
3427             break;
3428
3429         case 2:
3430             COLORFILL_ROW(WORD);
3431             break;
3432
3433         case 3:
3434         {
3435             BYTE *d = buf;
3436             for (x = 0; x < width; ++x, d += 3)
3437             {
3438                 d[0] = (color      ) & 0xFF;
3439                 d[1] = (color >>  8) & 0xFF;
3440                 d[2] = (color >> 16) & 0xFF;
3441             }
3442             break;
3443         }
3444         case 4:
3445             COLORFILL_ROW(DWORD);
3446             break;
3447
3448         default:
3449             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3450             return WINED3DERR_NOTAVAILABLE;
3451     }
3452
3453 #undef COLORFILL_ROW
3454
3455     /* Now copy first row. */
3456     first = buf;
3457     for (y = 1; y < height; ++y)
3458     {
3459         buf += pitch;
3460         memcpy(buf, first, width * bpp);
3461     }
3462
3463     return WINED3D_OK;
3464 }
3465
3466 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3467 {
3468     TRACE("surface %p.\n", surface);
3469
3470     if (!(surface->flags & SFLAG_LOCKED))
3471     {
3472         WARN("Trying to unmap unmapped surface.\n");
3473         return WINEDDERR_NOTLOCKED;
3474     }
3475     surface->flags &= ~SFLAG_LOCKED;
3476
3477     surface->surface_ops->surface_unmap(surface);
3478
3479     return WINED3D_OK;
3480 }
3481
3482 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3483         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3484 {
3485     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3486             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3487
3488     if (surface->flags & SFLAG_LOCKED)
3489     {
3490         WARN("Surface is already mapped.\n");
3491         return WINED3DERR_INVALIDCALL;
3492     }
3493     surface->flags |= SFLAG_LOCKED;
3494
3495     if (!(surface->flags & SFLAG_LOCKABLE))
3496         WARN("Trying to lock unlockable surface.\n");
3497
3498     surface->surface_ops->surface_map(surface, rect, flags);
3499
3500     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3501
3502     if (!rect)
3503     {
3504         locked_rect->pBits = surface->resource.allocatedMemory;
3505         surface->lockedRect.left = 0;
3506         surface->lockedRect.top = 0;
3507         surface->lockedRect.right = surface->resource.width;
3508         surface->lockedRect.bottom = surface->resource.height;
3509     }
3510     else
3511     {
3512         const struct wined3d_format *format = surface->resource.format;
3513
3514         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3515         {
3516             /* Compressed textures are block based, so calculate the offset of
3517              * the block that contains the top-left pixel of the locked rectangle. */
3518             locked_rect->pBits = surface->resource.allocatedMemory
3519                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3520                     + ((rect->left / format->block_width) * format->block_byte_count);
3521         }
3522         else
3523         {
3524             locked_rect->pBits = surface->resource.allocatedMemory
3525                     + (locked_rect->Pitch * rect->top)
3526                     + (rect->left * format->byte_count);
3527         }
3528         surface->lockedRect.left = rect->left;
3529         surface->lockedRect.top = rect->top;
3530         surface->lockedRect.right = rect->right;
3531         surface->lockedRect.bottom = rect->bottom;
3532     }
3533
3534     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3535     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3536
3537     return WINED3D_OK;
3538 }
3539
3540 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3541 {
3542     HRESULT hr;
3543
3544     TRACE("surface %p, dc %p.\n", surface, dc);
3545
3546     if (surface->flags & SFLAG_USERPTR)
3547     {
3548         ERR("Not supported on surfaces with application-provided memory.\n");
3549         return WINEDDERR_NODC;
3550     }
3551
3552     /* Give more detailed info for ddraw. */
3553     if (surface->flags & SFLAG_DCINUSE)
3554         return WINEDDERR_DCALREADYCREATED;
3555
3556     /* Can't GetDC if the surface is locked. */
3557     if (surface->flags & SFLAG_LOCKED)
3558         return WINED3DERR_INVALIDCALL;
3559
3560     hr = surface->surface_ops->surface_getdc(surface);
3561     if (FAILED(hr))
3562         return hr;
3563
3564     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3565             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3566     {
3567         /* GetDC on palettized formats is unsupported in D3D9, and the method
3568          * is missing in D3D8, so this should only be used for DX <=7
3569          * surfaces (with non-device palettes). */
3570         const PALETTEENTRY *pal = NULL;
3571
3572         if (surface->palette)
3573         {
3574             pal = surface->palette->palents;
3575         }
3576         else
3577         {
3578             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3579             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3580
3581             if (dds_primary && dds_primary->palette)
3582                 pal = dds_primary->palette->palents;
3583         }
3584
3585         if (pal)
3586         {
3587             RGBQUAD col[256];
3588             unsigned int i;
3589
3590             for (i = 0; i < 256; ++i)
3591             {
3592                 col[i].rgbRed = pal[i].peRed;
3593                 col[i].rgbGreen = pal[i].peGreen;
3594                 col[i].rgbBlue = pal[i].peBlue;
3595                 col[i].rgbReserved = 0;
3596             }
3597             SetDIBColorTable(surface->hDC, 0, 256, col);
3598         }
3599     }
3600
3601     surface->flags |= SFLAG_DCINUSE;
3602
3603     *dc = surface->hDC;
3604     TRACE("Returning dc %p.\n", *dc);
3605
3606     return WINED3D_OK;
3607 }
3608
3609 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3610 {
3611     TRACE("surface %p, dc %p.\n", surface, dc);
3612
3613     if (!(surface->flags & SFLAG_DCINUSE))
3614         return WINEDDERR_NODC;
3615
3616     if (surface->hDC != dc)
3617     {
3618         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3619                 dc, surface->hDC);
3620         return WINEDDERR_NODC;
3621     }
3622
3623     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3624     {
3625         /* Copy the contents of the DIB over to the PBO. */
3626         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->dib.bitmap_size);
3627     }
3628
3629     /* We locked first, so unlock now. */
3630     wined3d_surface_unmap(surface);
3631
3632     surface->flags &= ~SFLAG_DCINUSE;
3633
3634     return WINED3D_OK;
3635 }
3636
3637 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3638 {
3639     struct wined3d_swapchain *swapchain;
3640     HRESULT hr;
3641
3642     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3643
3644     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
3645     {
3646         ERR("Flipped surface is not on a swapchain.\n");
3647         return WINEDDERR_NOTFLIPPABLE;
3648     }
3649     swapchain = surface->container.u.swapchain;
3650
3651     hr = surface->surface_ops->surface_flip(surface, override);
3652     if (FAILED(hr))
3653         return hr;
3654
3655     /* Just overwrite the swapchain presentation interval. This is ok because
3656      * only ddraw apps can call Flip, and only d3d8 and d3d9 applications
3657      * specify the presentation interval. */
3658     if (!(flags & (WINEDDFLIP_NOVSYNC | WINEDDFLIP_INTERVAL2 | WINEDDFLIP_INTERVAL3 | WINEDDFLIP_INTERVAL4)))
3659         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_ONE;
3660     else if (flags & WINEDDFLIP_NOVSYNC)
3661         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
3662     else if (flags & WINEDDFLIP_INTERVAL2)
3663         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_TWO;
3664     else if (flags & WINEDDFLIP_INTERVAL3)
3665         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_THREE;
3666     else
3667         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_FOUR;
3668
3669     return wined3d_swapchain_present(swapchain, NULL, NULL, swapchain->win_handle, NULL, 0);
3670 }
3671
3672 /* Do not call while under the GL lock. */
3673 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3674 {
3675     struct wined3d_device *device = surface->resource.device;
3676
3677     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3678
3679     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3680     {
3681         struct wined3d_texture *texture = surface->container.u.texture;
3682
3683         TRACE("Passing to container (%p).\n", texture);
3684         texture->texture_ops->texture_preload(texture, srgb);
3685     }
3686     else
3687     {
3688         struct wined3d_context *context = NULL;
3689
3690         TRACE("(%p) : About to load surface\n", surface);
3691
3692         if (!device->isInDraw) context = context_acquire(device, NULL);
3693
3694         if (surface->resource.format->id == WINED3DFMT_P8_UINT
3695                 || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3696         {
3697             if (palette9_changed(surface))
3698             {
3699                 TRACE("Reloading surface because the d3d8/9 palette was changed\n");
3700                 /* TODO: This is not necessarily needed with hw palettized texture support */
3701                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3702                 /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
3703                 surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
3704             }
3705         }
3706
3707         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3708
3709         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3710         {
3711             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3712             GLclampf tmp;
3713             tmp = 0.9f;
3714             ENTER_GL();
3715             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3716             LEAVE_GL();
3717         }
3718
3719         if (context) context_release(context);
3720     }
3721 }
3722
3723 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3724 {
3725     if (!surface->resource.allocatedMemory)
3726     {
3727         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3728                 surface->resource.size + RESOURCE_ALIGNMENT);
3729         if (!surface->resource.heapMemory)
3730         {
3731             ERR("Out of memory\n");
3732             return FALSE;
3733         }
3734         surface->resource.allocatedMemory =
3735             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3736     }
3737     else
3738     {
3739         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3740     }
3741
3742     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3743
3744     return TRUE;
3745 }
3746
3747 /* Read the framebuffer back into the surface */
3748 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3749 {
3750     struct wined3d_device *device = surface->resource.device;
3751     const struct wined3d_gl_info *gl_info;
3752     struct wined3d_context *context;
3753     BYTE *mem;
3754     GLint fmt;
3755     GLint type;
3756     BYTE *row, *top, *bottom;
3757     int i;
3758     BOOL bpp;
3759     RECT local_rect;
3760     BOOL srcIsUpsideDown;
3761     GLint rowLen = 0;
3762     GLint skipPix = 0;
3763     GLint skipRow = 0;
3764
3765     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3766         static BOOL warned = FALSE;
3767         if(!warned) {
3768             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3769             warned = TRUE;
3770         }
3771         return;
3772     }
3773
3774     context = context_acquire(device, surface);
3775     context_apply_blit_state(context, device);
3776     gl_info = context->gl_info;
3777
3778     ENTER_GL();
3779
3780     /* Select the correct read buffer, and give some debug output.
3781      * There is no need to keep track of the current read buffer or reset it, every part of the code
3782      * that reads sets the read buffer as desired.
3783      */
3784     if (surface_is_offscreen(surface))
3785     {
3786         /* Mapping the primary render target which is not on a swapchain.
3787          * Read from the back buffer. */
3788         TRACE("Mapping offscreen render target.\n");
3789         glReadBuffer(device->offscreenBuffer);
3790         srcIsUpsideDown = TRUE;
3791     }
3792     else
3793     {
3794         /* Onscreen surfaces are always part of a swapchain */
3795         GLenum buffer = surface_get_gl_buffer(surface);
3796         TRACE("Mapping %#x buffer.\n", buffer);
3797         glReadBuffer(buffer);
3798         checkGLcall("glReadBuffer");
3799         srcIsUpsideDown = FALSE;
3800     }
3801
3802     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3803     if (!rect)
3804     {
3805         local_rect.left = 0;
3806         local_rect.top = 0;
3807         local_rect.right = surface->resource.width;
3808         local_rect.bottom = surface->resource.height;
3809     }
3810     else
3811     {
3812         local_rect = *rect;
3813     }
3814     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3815
3816     switch (surface->resource.format->id)
3817     {
3818         case WINED3DFMT_P8_UINT:
3819         {
3820             if (primary_render_target_is_p8(device))
3821             {
3822                 /* In case of P8 render targets the index is stored in the alpha component */
3823                 fmt = GL_ALPHA;
3824                 type = GL_UNSIGNED_BYTE;
3825                 mem = dest;
3826                 bpp = surface->resource.format->byte_count;
3827             }
3828             else
3829             {
3830                 /* GL can't return palettized data, so read ARGB pixels into a
3831                  * separate block of memory and convert them into palettized format
3832                  * in software. Slow, but if the app means to use palettized render
3833                  * targets and locks it...
3834                  *
3835                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
3836                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
3837                  * for the color channels when palettizing the colors.
3838                  */
3839                 fmt = GL_RGB;
3840                 type = GL_UNSIGNED_BYTE;
3841                 pitch *= 3;
3842                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
3843                 if (!mem)
3844                 {
3845                     ERR("Out of memory\n");
3846                     LEAVE_GL();
3847                     return;
3848                 }
3849                 bpp = surface->resource.format->byte_count * 3;
3850             }
3851         }
3852         break;
3853
3854         default:
3855             mem = dest;
3856             fmt = surface->resource.format->glFormat;
3857             type = surface->resource.format->glType;
3858             bpp = surface->resource.format->byte_count;
3859     }
3860
3861     if (surface->flags & SFLAG_PBO)
3862     {
3863         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
3864         checkGLcall("glBindBufferARB");
3865         if (mem)
3866         {
3867             ERR("mem not null for pbo -- unexpected\n");
3868             mem = NULL;
3869         }
3870     }
3871
3872     /* Save old pixel store pack state */
3873     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
3874     checkGLcall("glGetIntegerv");
3875     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
3876     checkGLcall("glGetIntegerv");
3877     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
3878     checkGLcall("glGetIntegerv");
3879
3880     /* Setup pixel store pack state -- to glReadPixels into the correct place */
3881     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
3882     checkGLcall("glPixelStorei");
3883     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
3884     checkGLcall("glPixelStorei");
3885     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
3886     checkGLcall("glPixelStorei");
3887
3888     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
3889             local_rect.right - local_rect.left,
3890             local_rect.bottom - local_rect.top,
3891             fmt, type, mem);
3892     checkGLcall("glReadPixels");
3893
3894     /* Reset previous pixel store pack state */
3895     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
3896     checkGLcall("glPixelStorei");
3897     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
3898     checkGLcall("glPixelStorei");
3899     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
3900     checkGLcall("glPixelStorei");
3901
3902     if (surface->flags & SFLAG_PBO)
3903     {
3904         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
3905         checkGLcall("glBindBufferARB");
3906
3907         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
3908          * to get a pointer to it and perform the flipping in software. This is a lot
3909          * faster than calling glReadPixels for each line. In case we want more speed
3910          * we should rerender it flipped in a FBO and read the data back from the FBO. */
3911         if (!srcIsUpsideDown)
3912         {
3913             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
3914             checkGLcall("glBindBufferARB");
3915
3916             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
3917             checkGLcall("glMapBufferARB");
3918         }
3919     }
3920
3921     /* TODO: Merge this with the palettization loop below for P8 targets */
3922     if(!srcIsUpsideDown) {
3923         UINT len, off;
3924         /* glReadPixels returns the image upside down, and there is no way to prevent this.
3925             Flip the lines in software */
3926         len = (local_rect.right - local_rect.left) * bpp;
3927         off = local_rect.left * bpp;
3928
3929         row = HeapAlloc(GetProcessHeap(), 0, len);
3930         if(!row) {
3931             ERR("Out of memory\n");
3932             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
3933                 HeapFree(GetProcessHeap(), 0, mem);
3934             LEAVE_GL();
3935             return;
3936         }
3937
3938         top = mem + pitch * local_rect.top;
3939         bottom = mem + pitch * (local_rect.bottom - 1);
3940         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
3941             memcpy(row, top + off, len);
3942             memcpy(top + off, bottom + off, len);
3943             memcpy(bottom + off, row, len);
3944             top += pitch;
3945             bottom -= pitch;
3946         }
3947         HeapFree(GetProcessHeap(), 0, row);
3948
3949         /* Unmap the temp PBO buffer */
3950         if (surface->flags & SFLAG_PBO)
3951         {
3952             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
3953             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
3954         }
3955     }
3956
3957     LEAVE_GL();
3958     context_release(context);
3959
3960     /* For P8 textures we need to perform an inverse palette lookup. This is
3961      * done by searching for a palette index which matches the RGB value.
3962      * Note this isn't guaranteed to work when there are multiple entries for
3963      * the same color but we have no choice. In case of P8 render targets,
3964      * the index is stored in the alpha component so no conversion is needed. */
3965     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
3966     {
3967         const PALETTEENTRY *pal = NULL;
3968         DWORD width = pitch / 3;
3969         int x, y, c;
3970
3971         if (surface->palette)
3972         {
3973             pal = surface->palette->palents;
3974         }
3975         else
3976         {
3977             ERR("Palette is missing, cannot perform inverse palette lookup\n");
3978             HeapFree(GetProcessHeap(), 0, mem);
3979             return;
3980         }
3981
3982         for(y = local_rect.top; y < local_rect.bottom; y++) {
3983             for(x = local_rect.left; x < local_rect.right; x++) {
3984                 /*                      start              lines            pixels      */
3985                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
3986                 const BYTE *green = blue  + 1;
3987                 const BYTE *red = green + 1;
3988
3989                 for(c = 0; c < 256; c++) {
3990                     if(*red   == pal[c].peRed   &&
3991                        *green == pal[c].peGreen &&
3992                        *blue  == pal[c].peBlue)
3993                     {
3994                         *((BYTE *) dest + y * width + x) = c;
3995                         break;
3996                     }
3997                 }
3998             }
3999         }
4000         HeapFree(GetProcessHeap(), 0, mem);
4001     }
4002 }
4003
4004 /* Read the framebuffer contents into a texture */
4005 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
4006 {
4007     struct wined3d_device *device = surface->resource.device;
4008     const struct wined3d_gl_info *gl_info;
4009     struct wined3d_context *context;
4010
4011     if (!surface_is_offscreen(surface))
4012     {
4013         /* We would need to flip onscreen surfaces, but there's no efficient
4014          * way to do that here. It makes more sense for the caller to
4015          * explicitly go through sysmem. */
4016         ERR("Not supported for onscreen targets.\n");
4017         return;
4018     }
4019
4020     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
4021      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
4022      * states in the stateblock, and no driver was found yet that had bugs in that regard.
4023      */
4024     context = context_acquire(device, surface);
4025     gl_info = context->gl_info;
4026     device_invalidate_state(device, STATE_FRAMEBUFFER);
4027
4028     surface_prepare_texture(surface, gl_info, srgb);
4029     surface_bind_and_dirtify(surface, gl_info, srgb);
4030
4031     TRACE("Reading back offscreen render target %p.\n", surface);
4032
4033     ENTER_GL();
4034
4035     glReadBuffer(device->offscreenBuffer);
4036     checkGLcall("glReadBuffer");
4037
4038     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4039             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4040     checkGLcall("glCopyTexSubImage2D");
4041
4042     LEAVE_GL();
4043
4044     context_release(context);
4045 }
4046
4047 /* Context activation is done by the caller. */
4048 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4049         const struct wined3d_gl_info *gl_info, BOOL srgb)
4050 {
4051     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4052     CONVERT_TYPES convert;
4053     struct wined3d_format format;
4054
4055     if (surface->flags & alloc_flag) return;
4056
4057     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4058     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4059     else surface->flags &= ~SFLAG_CONVERTED;
4060
4061     surface_bind_and_dirtify(surface, gl_info, srgb);
4062     surface_allocate_surface(surface, gl_info, &format, srgb);
4063     surface->flags |= alloc_flag;
4064 }
4065
4066 /* Context activation is done by the caller. */
4067 void surface_prepare_texture(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
4068 {
4069     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4070     {
4071         struct wined3d_texture *texture = surface->container.u.texture;
4072         UINT sub_count = texture->level_count * texture->layer_count;
4073         UINT i;
4074
4075         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4076
4077         for (i = 0; i < sub_count; ++i)
4078         {
4079             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4080             surface_prepare_texture_internal(s, gl_info, srgb);
4081         }
4082
4083         return;
4084     }
4085
4086     surface_prepare_texture_internal(surface, gl_info, srgb);
4087 }
4088
4089 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4090         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4091 {
4092     struct wined3d_device *device = surface->resource.device;
4093     UINT pitch = wined3d_surface_get_pitch(surface);
4094     const struct wined3d_gl_info *gl_info;
4095     struct wined3d_context *context;
4096     RECT local_rect;
4097     UINT w, h;
4098
4099     surface_get_rect(surface, rect, &local_rect);
4100
4101     mem += local_rect.top * pitch + local_rect.left * bpp;
4102     w = local_rect.right - local_rect.left;
4103     h = local_rect.bottom - local_rect.top;
4104
4105     /* Activate the correct context for the render target */
4106     context = context_acquire(device, surface);
4107     context_apply_blit_state(context, device);
4108     gl_info = context->gl_info;
4109
4110     ENTER_GL();
4111
4112     if (!surface_is_offscreen(surface))
4113     {
4114         GLenum buffer = surface_get_gl_buffer(surface);
4115         TRACE("Unlocking %#x buffer.\n", buffer);
4116         context_set_draw_buffer(context, buffer);
4117
4118         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4119         glPixelZoom(1.0f, -1.0f);
4120     }
4121     else
4122     {
4123         /* Primary offscreen render target */
4124         TRACE("Offscreen render target.\n");
4125         context_set_draw_buffer(context, device->offscreenBuffer);
4126
4127         glPixelZoom(1.0f, 1.0f);
4128     }
4129
4130     glRasterPos3i(local_rect.left, local_rect.top, 1);
4131     checkGLcall("glRasterPos3i");
4132
4133     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4134     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4135
4136     if (surface->flags & SFLAG_PBO)
4137     {
4138         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4139         checkGLcall("glBindBufferARB");
4140     }
4141
4142     glDrawPixels(w, h, fmt, type, mem);
4143     checkGLcall("glDrawPixels");
4144
4145     if (surface->flags & SFLAG_PBO)
4146     {
4147         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4148         checkGLcall("glBindBufferARB");
4149     }
4150
4151     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4152     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4153
4154     LEAVE_GL();
4155
4156     if (wined3d_settings.strict_draw_ordering
4157             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4158             && surface->container.u.swapchain->front_buffer == surface))
4159         wglFlush();
4160
4161     context_release(context);
4162 }
4163
4164 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4165         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4166 {
4167     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4168     const struct wined3d_device *device = surface->resource.device;
4169     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4170     BOOL blit_supported = FALSE;
4171
4172     /* Copy the default values from the surface. Below we might perform fixups */
4173     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4174     *format = *surface->resource.format;
4175     *convert = NO_CONVERSION;
4176
4177     /* Ok, now look if we have to do any conversion */
4178     switch (surface->resource.format->id)
4179     {
4180         case WINED3DFMT_P8_UINT:
4181             /* Below the call to blit_supported is disabled for Wine 1.2
4182              * because the function isn't operating correctly yet. At the
4183              * moment 8-bit blits are handled in software and if certain GL
4184              * extensions are around, surface conversion is performed at
4185              * upload time. The blit_supported call recognizes it as a
4186              * destination fixup. This type of upload 'fixup' and 8-bit to
4187              * 8-bit blits need to be handled by the blit_shader.
4188              * TODO: get rid of this #if 0. */
4189 #if 0
4190             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4191                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4192                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4193 #endif
4194             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4195
4196             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4197              * texturing. Further also use conversion in case of color keying.
4198              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4199              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4200              * conflicts with this.
4201              */
4202             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4203                     || colorkey_active || !use_texturing)
4204             {
4205                 format->glFormat = GL_RGBA;
4206                 format->glInternal = GL_RGBA;
4207                 format->glType = GL_UNSIGNED_BYTE;
4208                 format->conv_byte_count = 4;
4209                 if (colorkey_active)
4210                     *convert = CONVERT_PALETTED_CK;
4211                 else
4212                     *convert = CONVERT_PALETTED;
4213             }
4214             break;
4215
4216         case WINED3DFMT_B2G3R3_UNORM:
4217             /* **********************
4218                 GL_UNSIGNED_BYTE_3_3_2
4219                 ********************** */
4220             if (colorkey_active) {
4221                 /* This texture format will never be used.. So do not care about color keying
4222                     up until the point in time it will be needed :-) */
4223                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4224             }
4225             break;
4226
4227         case WINED3DFMT_B5G6R5_UNORM:
4228             if (colorkey_active)
4229             {
4230                 *convert = CONVERT_CK_565;
4231                 format->glFormat = GL_RGBA;
4232                 format->glInternal = GL_RGB5_A1;
4233                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4234                 format->conv_byte_count = 2;
4235             }
4236             break;
4237
4238         case WINED3DFMT_B5G5R5X1_UNORM:
4239             if (colorkey_active)
4240             {
4241                 *convert = CONVERT_CK_5551;
4242                 format->glFormat = GL_BGRA;
4243                 format->glInternal = GL_RGB5_A1;
4244                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4245                 format->conv_byte_count = 2;
4246             }
4247             break;
4248
4249         case WINED3DFMT_B8G8R8_UNORM:
4250             if (colorkey_active)
4251             {
4252                 *convert = CONVERT_CK_RGB24;
4253                 format->glFormat = GL_RGBA;
4254                 format->glInternal = GL_RGBA8;
4255                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4256                 format->conv_byte_count = 4;
4257             }
4258             break;
4259
4260         case WINED3DFMT_B8G8R8X8_UNORM:
4261             if (colorkey_active)
4262             {
4263                 *convert = CONVERT_RGB32_888;
4264                 format->glFormat = GL_RGBA;
4265                 format->glInternal = GL_RGBA8;
4266                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4267                 format->conv_byte_count = 4;
4268             }
4269             break;
4270
4271         default:
4272             break;
4273     }
4274
4275     return WINED3D_OK;
4276 }
4277
4278 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4279 {
4280     const struct wined3d_device *device = surface->resource.device;
4281     const struct wined3d_palette *pal = surface->palette;
4282     BOOL index_in_alpha = FALSE;
4283     unsigned int i;
4284
4285     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4286      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4287      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4288      * duplicate entries. Store the color key in the unused alpha component to speed the
4289      * download up and to make conversion unneeded. */
4290     index_in_alpha = primary_render_target_is_p8(device);
4291
4292     if (!pal)
4293     {
4294         /* In DirectDraw the palette is a property of the surface, there are no such things as device palettes. */
4295         if (device->wined3d->flags & WINED3D_PALETTE_PER_SURFACE)
4296         {
4297             ERR("This code should never get entered for DirectDraw!, expect problems\n");
4298             if (index_in_alpha)
4299             {
4300                 /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4301                  * there's no palette at this time. */
4302                 for (i = 0; i < 256; i++) table[i][3] = i;
4303             }
4304         }
4305         else
4306         {
4307             /* Direct3D >= 8 palette usage style: P8 textures use device palettes, palette entry format is A8R8G8B8,
4308              * alpha is stored in peFlags and may be used by the app if D3DPTEXTURECAPS_ALPHAPALETTE device
4309              * capability flag is present (wine does advertise this capability) */
4310             for (i = 0; i < 256; ++i)
4311             {
4312                 table[i][0] = device->palettes[device->currentPalette][i].peRed;
4313                 table[i][1] = device->palettes[device->currentPalette][i].peGreen;
4314                 table[i][2] = device->palettes[device->currentPalette][i].peBlue;
4315                 table[i][3] = device->palettes[device->currentPalette][i].peFlags;
4316             }
4317         }
4318     }
4319     else
4320     {
4321         TRACE("Using surface palette %p\n", pal);
4322         /* Get the surface's palette */
4323         for (i = 0; i < 256; ++i)
4324         {
4325             table[i][0] = pal->palents[i].peRed;
4326             table[i][1] = pal->palents[i].peGreen;
4327             table[i][2] = pal->palents[i].peBlue;
4328
4329             /* When index_in_alpha is set the palette index is stored in the
4330              * alpha component. In case of a readback we can then read
4331              * GL_ALPHA. Color keying is handled in BltOverride using a
4332              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4333              * color key itself is passed to glAlphaFunc in other cases the
4334              * alpha component of pixels that should be masked away is set to 0. */
4335             if (index_in_alpha)
4336             {
4337                 table[i][3] = i;
4338             }
4339             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4340                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4341             {
4342                 table[i][3] = 0x00;
4343             }
4344             else if (pal->flags & WINEDDPCAPS_ALPHA)
4345             {
4346                 table[i][3] = pal->palents[i].peFlags;
4347             }
4348             else
4349             {
4350                 table[i][3] = 0xFF;
4351             }
4352         }
4353     }
4354 }
4355
4356 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4357         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4358 {
4359     const BYTE *source;
4360     BYTE *dest;
4361     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4362
4363     switch (convert) {
4364         case NO_CONVERSION:
4365         {
4366             memcpy(dst, src, pitch * height);
4367             break;
4368         }
4369         case CONVERT_PALETTED:
4370         case CONVERT_PALETTED_CK:
4371         {
4372             BYTE table[256][4];
4373             unsigned int x, y;
4374
4375             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4376
4377             for (y = 0; y < height; y++)
4378             {
4379                 source = src + pitch * y;
4380                 dest = dst + outpitch * y;
4381                 /* This is an 1 bpp format, using the width here is fine */
4382                 for (x = 0; x < width; x++) {
4383                     BYTE color = *source++;
4384                     *dest++ = table[color][0];
4385                     *dest++ = table[color][1];
4386                     *dest++ = table[color][2];
4387                     *dest++ = table[color][3];
4388                 }
4389             }
4390         }
4391         break;
4392
4393         case CONVERT_CK_565:
4394         {
4395             /* Converting the 565 format in 5551 packed to emulate color-keying.
4396
4397               Note : in all these conversion, it would be best to average the averaging
4398                       pixels to get the color of the pixel that will be color-keyed to
4399                       prevent 'color bleeding'. This will be done later on if ever it is
4400                       too visible.
4401
4402               Note2: Nvidia documents say that their driver does not support alpha + color keying
4403                      on the same surface and disables color keying in such a case
4404             */
4405             unsigned int x, y;
4406             const WORD *Source;
4407             WORD *Dest;
4408
4409             TRACE("Color keyed 565\n");
4410
4411             for (y = 0; y < height; y++) {
4412                 Source = (const WORD *)(src + y * pitch);
4413                 Dest = (WORD *) (dst + y * outpitch);
4414                 for (x = 0; x < width; x++ ) {
4415                     WORD color = *Source++;
4416                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4417                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4418                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4419                         *Dest |= 0x0001;
4420                     Dest++;
4421                 }
4422             }
4423         }
4424         break;
4425
4426         case CONVERT_CK_5551:
4427         {
4428             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4429             unsigned int x, y;
4430             const WORD *Source;
4431             WORD *Dest;
4432             TRACE("Color keyed 5551\n");
4433             for (y = 0; y < height; y++) {
4434                 Source = (const WORD *)(src + y * pitch);
4435                 Dest = (WORD *) (dst + y * outpitch);
4436                 for (x = 0; x < width; x++ ) {
4437                     WORD color = *Source++;
4438                     *Dest = color;
4439                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4440                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4441                         *Dest |= (1 << 15);
4442                     else
4443                         *Dest &= ~(1 << 15);
4444                     Dest++;
4445                 }
4446             }
4447         }
4448         break;
4449
4450         case CONVERT_CK_RGB24:
4451         {
4452             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4453             unsigned int x, y;
4454             for (y = 0; y < height; y++)
4455             {
4456                 source = src + pitch * y;
4457                 dest = dst + outpitch * y;
4458                 for (x = 0; x < width; x++) {
4459                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4460                     DWORD dstcolor = color << 8;
4461                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4462                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4463                         dstcolor |= 0xff;
4464                     *(DWORD*)dest = dstcolor;
4465                     source += 3;
4466                     dest += 4;
4467                 }
4468             }
4469         }
4470         break;
4471
4472         case CONVERT_RGB32_888:
4473         {
4474             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4475             unsigned int x, y;
4476             for (y = 0; y < height; y++)
4477             {
4478                 source = src + pitch * y;
4479                 dest = dst + outpitch * y;
4480                 for (x = 0; x < width; x++) {
4481                     DWORD color = 0xffffff & *(const DWORD*)source;
4482                     DWORD dstcolor = color << 8;
4483                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4484                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4485                         dstcolor |= 0xff;
4486                     *(DWORD*)dest = dstcolor;
4487                     source += 4;
4488                     dest += 4;
4489                 }
4490             }
4491         }
4492         break;
4493
4494         default:
4495             ERR("Unsupported conversion type %#x.\n", convert);
4496     }
4497     return WINED3D_OK;
4498 }
4499
4500 BOOL palette9_changed(struct wined3d_surface *surface)
4501 {
4502     struct wined3d_device *device = surface->resource.device;
4503
4504     if (surface->palette || (surface->resource.format->id != WINED3DFMT_P8_UINT
4505             && surface->resource.format->id != WINED3DFMT_P8_UINT_A8_UNORM))
4506     {
4507         /* If a ddraw-style palette is attached assume no d3d9 palette change.
4508          * Also the palette isn't interesting if the surface format isn't P8 or A8P8
4509          */
4510         return FALSE;
4511     }
4512
4513     if (surface->palette9)
4514     {
4515         if (!memcmp(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256))
4516         {
4517             return FALSE;
4518         }
4519     }
4520     else
4521     {
4522         surface->palette9 = HeapAlloc(GetProcessHeap(), 0, sizeof(PALETTEENTRY) * 256);
4523     }
4524     memcpy(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256);
4525
4526     return TRUE;
4527 }
4528
4529 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4530 {
4531     /* Flip the surface contents */
4532     /* Flip the DC */
4533     {
4534         HDC tmp;
4535         tmp = front->hDC;
4536         front->hDC = back->hDC;
4537         back->hDC = tmp;
4538     }
4539
4540     /* Flip the DIBsection */
4541     {
4542         HBITMAP tmp;
4543         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4544         tmp = front->dib.DIBsection;
4545         front->dib.DIBsection = back->dib.DIBsection;
4546         back->dib.DIBsection = tmp;
4547
4548         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4549         else front->flags &= ~SFLAG_DIBSECTION;
4550         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4551         else back->flags &= ~SFLAG_DIBSECTION;
4552     }
4553
4554     /* Flip the surface data */
4555     {
4556         void* tmp;
4557
4558         tmp = front->dib.bitmap_data;
4559         front->dib.bitmap_data = back->dib.bitmap_data;
4560         back->dib.bitmap_data = tmp;
4561
4562         tmp = front->resource.allocatedMemory;
4563         front->resource.allocatedMemory = back->resource.allocatedMemory;
4564         back->resource.allocatedMemory = tmp;
4565
4566         tmp = front->resource.heapMemory;
4567         front->resource.heapMemory = back->resource.heapMemory;
4568         back->resource.heapMemory = tmp;
4569     }
4570
4571     /* Flip the PBO */
4572     {
4573         GLuint tmp_pbo = front->pbo;
4574         front->pbo = back->pbo;
4575         back->pbo = tmp_pbo;
4576     }
4577
4578     /* client_memory should not be different, but just in case */
4579     {
4580         BOOL tmp;
4581         tmp = front->dib.client_memory;
4582         front->dib.client_memory = back->dib.client_memory;
4583         back->dib.client_memory = tmp;
4584     }
4585
4586     /* Flip the opengl texture */
4587     {
4588         GLuint tmp;
4589
4590         tmp = back->texture_name;
4591         back->texture_name = front->texture_name;
4592         front->texture_name = tmp;
4593
4594         tmp = back->texture_name_srgb;
4595         back->texture_name_srgb = front->texture_name_srgb;
4596         front->texture_name_srgb = tmp;
4597
4598         resource_unload(&back->resource);
4599         resource_unload(&front->resource);
4600     }
4601
4602     {
4603         DWORD tmp_flags = back->flags;
4604         back->flags = front->flags;
4605         front->flags = tmp_flags;
4606     }
4607 }
4608
4609 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4610  * pixel copy calls. */
4611 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4612         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4613 {
4614     struct wined3d_device *device = dst_surface->resource.device;
4615     float xrel, yrel;
4616     UINT row;
4617     struct wined3d_context *context;
4618     BOOL upsidedown = FALSE;
4619     RECT dst_rect = *dst_rect_in;
4620
4621     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4622      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4623      */
4624     if(dst_rect.top > dst_rect.bottom) {
4625         UINT tmp = dst_rect.bottom;
4626         dst_rect.bottom = dst_rect.top;
4627         dst_rect.top = tmp;
4628         upsidedown = TRUE;
4629     }
4630
4631     context = context_acquire(device, src_surface);
4632     context_apply_blit_state(context, device);
4633     surface_internal_preload(dst_surface, SRGB_RGB);
4634     ENTER_GL();
4635
4636     /* Bind the target texture */
4637     glBindTexture(dst_surface->texture_target, dst_surface->texture_name);
4638     checkGLcall("glBindTexture");
4639     if (surface_is_offscreen(src_surface))
4640     {
4641         TRACE("Reading from an offscreen target\n");
4642         upsidedown = !upsidedown;
4643         glReadBuffer(device->offscreenBuffer);
4644     }
4645     else
4646     {
4647         glReadBuffer(surface_get_gl_buffer(src_surface));
4648     }
4649     checkGLcall("glReadBuffer");
4650
4651     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4652     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4653
4654     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4655     {
4656         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4657
4658         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4659             ERR("Texture filtering not supported in direct blit\n");
4660         }
4661     }
4662     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4663             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4664     {
4665         ERR("Texture filtering not supported in direct blit\n");
4666     }
4667
4668     if (upsidedown
4669             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4670             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4671     {
4672         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4673
4674         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4675                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4676                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4677                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4678     }
4679     else
4680     {
4681         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4682         /* I have to process this row by row to swap the image,
4683          * otherwise it would be upside down, so stretching in y direction
4684          * doesn't cost extra time
4685          *
4686          * However, stretching in x direction can be avoided if not necessary
4687          */
4688         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4689             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4690             {
4691                 /* Well, that stuff works, but it's very slow.
4692                  * find a better way instead
4693                  */
4694                 UINT col;
4695
4696                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4697                 {
4698                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4699                             dst_rect.left + col /* x offset */, row /* y offset */,
4700                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4701                 }
4702             }
4703             else
4704             {
4705                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4706                         dst_rect.left /* x offset */, row /* y offset */,
4707                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4708             }
4709         }
4710     }
4711     checkGLcall("glCopyTexSubImage2D");
4712
4713     LEAVE_GL();
4714     context_release(context);
4715
4716     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4717      * path is never entered
4718      */
4719     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4720 }
4721
4722 /* Uses the hardware to stretch and flip the image */
4723 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4724         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4725 {
4726     struct wined3d_device *device = dst_surface->resource.device;
4727     struct wined3d_swapchain *src_swapchain = NULL;
4728     GLuint src, backup = 0;
4729     float left, right, top, bottom; /* Texture coordinates */
4730     UINT fbwidth = src_surface->resource.width;
4731     UINT fbheight = src_surface->resource.height;
4732     struct wined3d_context *context;
4733     GLenum drawBuffer = GL_BACK;
4734     GLenum texture_target;
4735     BOOL noBackBufferBackup;
4736     BOOL src_offscreen;
4737     BOOL upsidedown = FALSE;
4738     RECT dst_rect = *dst_rect_in;
4739
4740     TRACE("Using hwstretch blit\n");
4741     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4742     context = context_acquire(device, src_surface);
4743     context_apply_blit_state(context, device);
4744     surface_internal_preload(dst_surface, SRGB_RGB);
4745
4746     src_offscreen = surface_is_offscreen(src_surface);
4747     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4748     if (!noBackBufferBackup && !src_surface->texture_name)
4749     {
4750         /* Get it a description */
4751         surface_internal_preload(src_surface, SRGB_RGB);
4752     }
4753     ENTER_GL();
4754
4755     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4756      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4757      */
4758     if (context->aux_buffers >= 2)
4759     {
4760         /* Got more than one aux buffer? Use the 2nd aux buffer */
4761         drawBuffer = GL_AUX1;
4762     }
4763     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4764     {
4765         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4766         drawBuffer = GL_AUX0;
4767     }
4768
4769     if(noBackBufferBackup) {
4770         glGenTextures(1, &backup);
4771         checkGLcall("glGenTextures");
4772         glBindTexture(GL_TEXTURE_2D, backup);
4773         checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4774         texture_target = GL_TEXTURE_2D;
4775     } else {
4776         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4777          * we are reading from the back buffer, the backup can be used as source texture
4778          */
4779         texture_target = src_surface->texture_target;
4780         glBindTexture(texture_target, src_surface->texture_name);
4781         checkGLcall("glBindTexture(texture_target, src_surface->texture_name)");
4782         glEnable(texture_target);
4783         checkGLcall("glEnable(texture_target)");
4784
4785         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4786         src_surface->flags &= ~SFLAG_INTEXTURE;
4787     }
4788
4789     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4790      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4791      */
4792     if(dst_rect.top > dst_rect.bottom) {
4793         UINT tmp = dst_rect.bottom;
4794         dst_rect.bottom = dst_rect.top;
4795         dst_rect.top = tmp;
4796         upsidedown = TRUE;
4797     }
4798
4799     if (src_offscreen)
4800     {
4801         TRACE("Reading from an offscreen target\n");
4802         upsidedown = !upsidedown;
4803         glReadBuffer(device->offscreenBuffer);
4804     }
4805     else
4806     {
4807         glReadBuffer(surface_get_gl_buffer(src_surface));
4808     }
4809
4810     /* TODO: Only back up the part that will be overwritten */
4811     glCopyTexSubImage2D(texture_target, 0,
4812                         0, 0 /* read offsets */,
4813                         0, 0,
4814                         fbwidth,
4815                         fbheight);
4816
4817     checkGLcall("glCopyTexSubImage2D");
4818
4819     /* No issue with overriding these - the sampler is dirty due to blit usage */
4820     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
4821             wined3d_gl_mag_filter(magLookup, Filter));
4822     checkGLcall("glTexParameteri");
4823     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
4824             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
4825     checkGLcall("glTexParameteri");
4826
4827     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4828         src_swapchain = src_surface->container.u.swapchain;
4829     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
4830     {
4831         src = backup ? backup : src_surface->texture_name;
4832     }
4833     else
4834     {
4835         glReadBuffer(GL_FRONT);
4836         checkGLcall("glReadBuffer(GL_FRONT)");
4837
4838         glGenTextures(1, &src);
4839         checkGLcall("glGenTextures(1, &src)");
4840         glBindTexture(GL_TEXTURE_2D, src);
4841         checkGLcall("glBindTexture(GL_TEXTURE_2D, src)");
4842
4843         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
4844          * out for power of 2 sizes
4845          */
4846         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
4847                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
4848         checkGLcall("glTexImage2D");
4849         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
4850                             0, 0 /* read offsets */,
4851                             0, 0,
4852                             fbwidth,
4853                             fbheight);
4854
4855         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
4856         checkGLcall("glTexParameteri");
4857         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
4858         checkGLcall("glTexParameteri");
4859
4860         glReadBuffer(GL_BACK);
4861         checkGLcall("glReadBuffer(GL_BACK)");
4862
4863         if(texture_target != GL_TEXTURE_2D) {
4864             glDisable(texture_target);
4865             glEnable(GL_TEXTURE_2D);
4866             texture_target = GL_TEXTURE_2D;
4867         }
4868     }
4869     checkGLcall("glEnd and previous");
4870
4871     left = src_rect->left;
4872     right = src_rect->right;
4873
4874     if (!upsidedown)
4875     {
4876         top = src_surface->resource.height - src_rect->top;
4877         bottom = src_surface->resource.height - src_rect->bottom;
4878     }
4879     else
4880     {
4881         top = src_surface->resource.height - src_rect->bottom;
4882         bottom = src_surface->resource.height - src_rect->top;
4883     }
4884
4885     if (src_surface->flags & SFLAG_NORMCOORD)
4886     {
4887         left /= src_surface->pow2Width;
4888         right /= src_surface->pow2Width;
4889         top /= src_surface->pow2Height;
4890         bottom /= src_surface->pow2Height;
4891     }
4892
4893     /* draw the source texture stretched and upside down. The correct surface is bound already */
4894     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
4895     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
4896
4897     context_set_draw_buffer(context, drawBuffer);
4898     glReadBuffer(drawBuffer);
4899
4900     glBegin(GL_QUADS);
4901         /* bottom left */
4902         glTexCoord2f(left, bottom);
4903         glVertex2i(0, 0);
4904
4905         /* top left */
4906         glTexCoord2f(left, top);
4907         glVertex2i(0, dst_rect.bottom - dst_rect.top);
4908
4909         /* top right */
4910         glTexCoord2f(right, top);
4911         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4912
4913         /* bottom right */
4914         glTexCoord2f(right, bottom);
4915         glVertex2i(dst_rect.right - dst_rect.left, 0);
4916     glEnd();
4917     checkGLcall("glEnd and previous");
4918
4919     if (texture_target != dst_surface->texture_target)
4920     {
4921         glDisable(texture_target);
4922         glEnable(dst_surface->texture_target);
4923         texture_target = dst_surface->texture_target;
4924     }
4925
4926     /* Now read the stretched and upside down image into the destination texture */
4927     glBindTexture(texture_target, dst_surface->texture_name);
4928     checkGLcall("glBindTexture");
4929     glCopyTexSubImage2D(texture_target,
4930                         0,
4931                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
4932                         0, 0, /* We blitted the image to the origin */
4933                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4934     checkGLcall("glCopyTexSubImage2D");
4935
4936     if(drawBuffer == GL_BACK) {
4937         /* Write the back buffer backup back */
4938         if(backup) {
4939             if(texture_target != GL_TEXTURE_2D) {
4940                 glDisable(texture_target);
4941                 glEnable(GL_TEXTURE_2D);
4942                 texture_target = GL_TEXTURE_2D;
4943             }
4944             glBindTexture(GL_TEXTURE_2D, backup);
4945             checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4946         }
4947         else
4948         {
4949             if (texture_target != src_surface->texture_target)
4950             {
4951                 glDisable(texture_target);
4952                 glEnable(src_surface->texture_target);
4953                 texture_target = src_surface->texture_target;
4954             }
4955             glBindTexture(src_surface->texture_target, src_surface->texture_name);
4956             checkGLcall("glBindTexture(src_surface->texture_target, src_surface->texture_name)");
4957         }
4958
4959         glBegin(GL_QUADS);
4960             /* top left */
4961             glTexCoord2f(0.0f, 0.0f);
4962             glVertex2i(0, fbheight);
4963
4964             /* bottom left */
4965             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
4966             glVertex2i(0, 0);
4967
4968             /* bottom right */
4969             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
4970                     (float)fbheight / (float)src_surface->pow2Height);
4971             glVertex2i(fbwidth, 0);
4972
4973             /* top right */
4974             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
4975             glVertex2i(fbwidth, fbheight);
4976         glEnd();
4977     }
4978     glDisable(texture_target);
4979     checkGLcall("glDisable(texture_target)");
4980
4981     /* Cleanup */
4982     if (src != src_surface->texture_name && src != backup)
4983     {
4984         glDeleteTextures(1, &src);
4985         checkGLcall("glDeleteTextures(1, &src)");
4986     }
4987     if(backup) {
4988         glDeleteTextures(1, &backup);
4989         checkGLcall("glDeleteTextures(1, &backup)");
4990     }
4991
4992     LEAVE_GL();
4993
4994     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
4995
4996     context_release(context);
4997
4998     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4999      * path is never entered
5000      */
5001     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5002 }
5003
5004 /* Front buffer coordinates are always full screen coordinates, but our GL
5005  * drawable is limited to the window's client area. The sysmem and texture
5006  * copies do have the full screen size. Note that GL has a bottom-left
5007  * origin, while D3D has a top-left origin. */
5008 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5009 {
5010     UINT drawable_height;
5011
5012     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5013             && surface == surface->container.u.swapchain->front_buffer)
5014     {
5015         POINT offset = {0, 0};
5016         RECT windowsize;
5017
5018         ScreenToClient(window, &offset);
5019         OffsetRect(rect, offset.x, offset.y);
5020
5021         GetClientRect(window, &windowsize);
5022         drawable_height = windowsize.bottom - windowsize.top;
5023     }
5024     else
5025     {
5026         drawable_height = surface->resource.height;
5027     }
5028
5029     rect->top = drawable_height - rect->top;
5030     rect->bottom = drawable_height - rect->bottom;
5031 }
5032
5033 /* blit between surface locations. onscreen on different swapchains is not supported.
5034  * depth / stencil is not supported. */
5035 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
5036         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
5037         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
5038 {
5039     const struct wined3d_gl_info *gl_info;
5040     struct wined3d_context *context;
5041     RECT src_rect, dst_rect;
5042     GLenum gl_filter;
5043     GLenum buffer;
5044
5045     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
5046     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
5047             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
5048     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
5049             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
5050
5051     src_rect = *src_rect_in;
5052     dst_rect = *dst_rect_in;
5053
5054     switch (filter)
5055     {
5056         case WINED3DTEXF_LINEAR:
5057             gl_filter = GL_LINEAR;
5058             break;
5059
5060         default:
5061             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
5062         case WINED3DTEXF_NONE:
5063         case WINED3DTEXF_POINT:
5064             gl_filter = GL_NEAREST;
5065             break;
5066     }
5067
5068     if (src_location == SFLAG_INDRAWABLE && surface_is_offscreen(src_surface))
5069         src_location = SFLAG_INTEXTURE;
5070     if (dst_location == SFLAG_INDRAWABLE && surface_is_offscreen(dst_surface))
5071         dst_location = SFLAG_INTEXTURE;
5072
5073     /* Make sure the locations are up-to-date. Loading the destination
5074      * surface isn't required if the entire surface is overwritten. (And is
5075      * in fact harmful if we're being called by surface_load_location() with
5076      * the purpose of loading the destination surface.) */
5077     surface_load_location(src_surface, src_location, NULL);
5078     if (!surface_is_full_rect(dst_surface, &dst_rect))
5079         surface_load_location(dst_surface, dst_location, NULL);
5080
5081     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
5082     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
5083     else context = context_acquire(device, NULL);
5084
5085     if (!context->valid)
5086     {
5087         context_release(context);
5088         WARN("Invalid context, skipping blit.\n");
5089         return;
5090     }
5091
5092     gl_info = context->gl_info;
5093
5094     if (src_location == SFLAG_INDRAWABLE)
5095     {
5096         TRACE("Source surface %p is onscreen.\n", src_surface);
5097         buffer = surface_get_gl_buffer(src_surface);
5098         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
5099     }
5100     else
5101     {
5102         TRACE("Source surface %p is offscreen.\n", src_surface);
5103         buffer = GL_COLOR_ATTACHMENT0;
5104     }
5105
5106     ENTER_GL();
5107     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
5108     glReadBuffer(buffer);
5109     checkGLcall("glReadBuffer()");
5110     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
5111     LEAVE_GL();
5112
5113     if (dst_location == SFLAG_INDRAWABLE)
5114     {
5115         TRACE("Destination surface %p is onscreen.\n", dst_surface);
5116         buffer = surface_get_gl_buffer(dst_surface);
5117         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5118     }
5119     else
5120     {
5121         TRACE("Destination surface %p is offscreen.\n", dst_surface);
5122         buffer = GL_COLOR_ATTACHMENT0;
5123     }
5124
5125     ENTER_GL();
5126     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
5127     context_set_draw_buffer(context, buffer);
5128     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
5129     context_invalidate_state(context, STATE_FRAMEBUFFER);
5130
5131     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
5132     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
5133     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
5134     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
5135     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
5136
5137     glDisable(GL_SCISSOR_TEST);
5138     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
5139
5140     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
5141             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
5142     checkGLcall("glBlitFramebuffer()");
5143
5144     LEAVE_GL();
5145
5146     if (wined3d_settings.strict_draw_ordering
5147             || (dst_location == SFLAG_INDRAWABLE
5148             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
5149         wglFlush();
5150
5151     context_release(context);
5152 }
5153
5154 static void surface_blt_to_drawable(struct wined3d_device *device,
5155         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5156         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5157         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5158 {
5159     struct wined3d_context *context;
5160     RECT src_rect, dst_rect;
5161
5162     src_rect = *src_rect_in;
5163     dst_rect = *dst_rect_in;
5164
5165     /* Make sure the surface is up-to-date. This should probably use
5166      * surface_load_location() and worry about the destination surface too,
5167      * unless we're overwriting it completely. */
5168     surface_internal_preload(src_surface, SRGB_RGB);
5169
5170     /* Activate the destination context, set it up for blitting */
5171     context = context_acquire(device, dst_surface);
5172     context_apply_blit_state(context, device);
5173
5174     if (!surface_is_offscreen(dst_surface))
5175         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5176
5177     device->blitter->set_shader(device->blit_priv, context->gl_info, src_surface);
5178
5179     ENTER_GL();
5180
5181     if (color_key)
5182     {
5183         glEnable(GL_ALPHA_TEST);
5184         checkGLcall("glEnable(GL_ALPHA_TEST)");
5185
5186         /* When the primary render target uses P8, the alpha component
5187          * contains the palette index. Which means that the colorkey is one of
5188          * the palette entries. In other cases pixels that should be masked
5189          * away have alpha set to 0. */
5190         if (primary_render_target_is_p8(device))
5191             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5192         else
5193             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5194         checkGLcall("glAlphaFunc");
5195     }
5196     else
5197     {
5198         glDisable(GL_ALPHA_TEST);
5199         checkGLcall("glDisable(GL_ALPHA_TEST)");
5200     }
5201
5202     draw_textured_quad(src_surface, &src_rect, &dst_rect, filter);
5203
5204     if (color_key)
5205     {
5206         glDisable(GL_ALPHA_TEST);
5207         checkGLcall("glDisable(GL_ALPHA_TEST)");
5208     }
5209
5210     LEAVE_GL();
5211
5212     /* Leave the opengl state valid for blitting */
5213     device->blitter->unset_shader(context->gl_info);
5214
5215     if (wined3d_settings.strict_draw_ordering
5216             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5217             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5218         wglFlush(); /* Flush to ensure ordering across contexts. */
5219
5220     context_release(context);
5221 }
5222
5223 /* Do not call while under the GL lock. */
5224 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5225 {
5226     struct wined3d_device *device = s->resource.device;
5227     const struct blit_shader *blitter;
5228
5229     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5230             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5231     if (!blitter)
5232     {
5233         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5234         return WINED3DERR_INVALIDCALL;
5235     }
5236
5237     return blitter->color_fill(device, s, rect, color);
5238 }
5239
5240 /* Do not call while under the GL lock. */
5241 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5242         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5243         WINED3DTEXTUREFILTERTYPE Filter)
5244 {
5245     struct wined3d_device *device = dst_surface->resource.device;
5246     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5247     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5248
5249     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5250             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5251             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5252
5253     /* Get the swapchain. One of the surfaces has to be a primary surface */
5254     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5255     {
5256         WARN("Destination is in sysmem, rejecting gl blt\n");
5257         return WINED3DERR_INVALIDCALL;
5258     }
5259
5260     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5261         dstSwapchain = dst_surface->container.u.swapchain;
5262
5263     if (src_surface)
5264     {
5265         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5266         {
5267             WARN("Src is in sysmem, rejecting gl blt\n");
5268             return WINED3DERR_INVALIDCALL;
5269         }
5270
5271         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5272             srcSwapchain = src_surface->container.u.swapchain;
5273     }
5274
5275     /* Early sort out of cases where no render target is used */
5276     if (!dstSwapchain && !srcSwapchain
5277             && src_surface != device->fb.render_targets[0]
5278             && dst_surface != device->fb.render_targets[0])
5279     {
5280         TRACE("No surface is render target, not using hardware blit.\n");
5281         return WINED3DERR_INVALIDCALL;
5282     }
5283
5284     /* No destination color keying supported */
5285     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5286     {
5287         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5288         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5289         return WINED3DERR_INVALIDCALL;
5290     }
5291
5292     /* The only case where both surfaces on a swapchain are supported is a back buffer -> front buffer blit on the same swapchain */
5293     if (dstSwapchain && dstSwapchain == srcSwapchain && dstSwapchain->back_buffers
5294             && dst_surface == dstSwapchain->front_buffer
5295             && src_surface == dstSwapchain->back_buffers[0])
5296     {
5297         /* Half-Life does a Blt from the back buffer to the front buffer,
5298          * Full surface size, no flags... Use present instead
5299          *
5300          * This path will only be entered for d3d7 and ddraw apps, because d3d8/9 offer no way to blit TO the front buffer
5301          */
5302
5303         /* Check rects - wined3d_swapchain_present() doesn't handle them. */
5304         for (;;)
5305         {
5306             TRACE("Looking if a Present can be done...\n");
5307             /* Source Rectangle must be full surface */
5308             if (src_rect->left || src_rect->top
5309                     || src_rect->right != src_surface->resource.width
5310                     || src_rect->bottom != src_surface->resource.height)
5311             {
5312                 TRACE("No, Source rectangle doesn't match\n");
5313                 break;
5314             }
5315
5316             /* No stretching may occur */
5317             if (src_rect->right != dst_rect->right - dst_rect->left
5318                     || src_rect->bottom != dst_rect->bottom - dst_rect->top)
5319             {
5320                 TRACE("No, stretching is done\n");
5321                 break;
5322             }
5323
5324             /* Destination must be full surface or match the clipping rectangle */
5325             if (dst_surface->clipper && dst_surface->clipper->hWnd)
5326             {
5327                 RECT cliprect;
5328                 POINT pos[2];
5329                 GetClientRect(dst_surface->clipper->hWnd, &cliprect);
5330                 pos[0].x = dst_rect->left;
5331                 pos[0].y = dst_rect->top;
5332                 pos[1].x = dst_rect->right;
5333                 pos[1].y = dst_rect->bottom;
5334                 MapWindowPoints(GetDesktopWindow(), dst_surface->clipper->hWnd, pos, 2);
5335
5336                 if (pos[0].x != cliprect.left || pos[0].y != cliprect.top
5337                         || pos[1].x != cliprect.right || pos[1].y != cliprect.bottom)
5338                 {
5339                     TRACE("No, dest rectangle doesn't match(clipper)\n");
5340                     TRACE("Clip rect at %s\n", wine_dbgstr_rect(&cliprect));
5341                     TRACE("Blt dest: %s\n", wine_dbgstr_rect(dst_rect));
5342                     break;
5343                 }
5344             }
5345             else if (dst_rect->left || dst_rect->top
5346                     || dst_rect->right != dst_surface->resource.width
5347                     || dst_rect->bottom != dst_surface->resource.height)
5348             {
5349                 TRACE("No, dest rectangle doesn't match(surface size)\n");
5350                 break;
5351             }
5352
5353             TRACE("Yes\n");
5354
5355             /* These flags are unimportant for the flag check, remove them */
5356             if (!(flags & ~(WINEDDBLT_DONOTWAIT | WINEDDBLT_WAIT)))
5357             {
5358                 WINED3DSWAPEFFECT orig_swap = dstSwapchain->presentParms.SwapEffect;
5359
5360                 /* The idea behind this is that a glReadPixels and a glDrawPixels call
5361                     * take very long, while a flip is fast.
5362                     * This applies to Half-Life, which does such Blts every time it finished
5363                     * a frame, and to Prince of Persia 3D, which uses this to draw at least the main
5364                     * menu. This is also used by all apps when they do windowed rendering
5365                     *
5366                     * The problem is that flipping is not really the same as copying. After a
5367                     * Blt the front buffer is a copy of the back buffer, and the back buffer is
5368                     * untouched. Therefore it's necessary to override the swap effect
5369                     * and to set it back after the flip.
5370                     *
5371                     * Windowed Direct3D < 7 apps do the same. The D3D7 sdk demos are nice
5372                     * testcases.
5373                     */
5374
5375                 dstSwapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
5376                 dstSwapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
5377
5378                 TRACE("Full screen back buffer -> front buffer blt, performing a flip instead.\n");
5379                 wined3d_swapchain_present(dstSwapchain, NULL, NULL, dstSwapchain->win_handle, NULL, 0);
5380
5381                 dstSwapchain->presentParms.SwapEffect = orig_swap;
5382
5383                 return WINED3D_OK;
5384             }
5385             break;
5386         }
5387
5388         TRACE("Unsupported blit between buffers on the same swapchain\n");
5389         return WINED3DERR_INVALIDCALL;
5390     } else if(dstSwapchain && dstSwapchain == srcSwapchain) {
5391         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5392         return WINED3DERR_INVALIDCALL;
5393     } else if(dstSwapchain && srcSwapchain) {
5394         FIXME("Implement hardware blit between two different swapchains\n");
5395         return WINED3DERR_INVALIDCALL;
5396     }
5397     else if (dstSwapchain)
5398     {
5399         /* Handled with regular texture -> swapchain blit */
5400         if (src_surface == device->fb.render_targets[0])
5401             TRACE("Blit from active render target to a swapchain\n");
5402     }
5403     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5404     {
5405         FIXME("Implement blit from a swapchain to the active render target\n");
5406         return WINED3DERR_INVALIDCALL;
5407     }
5408
5409     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5410     {
5411         /* Blit from render target to texture */
5412         BOOL stretchx;
5413
5414         /* P8 read back is not implemented */
5415         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5416                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5417         {
5418             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5419             return WINED3DERR_INVALIDCALL;
5420         }
5421
5422         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5423         {
5424             TRACE("Color keying not supported by frame buffer to texture blit\n");
5425             return WINED3DERR_INVALIDCALL;
5426             /* Destination color key is checked above */
5427         }
5428
5429         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5430             stretchx = TRUE;
5431         else
5432             stretchx = FALSE;
5433
5434         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5435          * flip the image nor scale it.
5436          *
5437          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5438          * -> If the app wants a image width an unscaled width, copy it line per line
5439          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5440          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5441          *    back buffer. This is slower than reading line per line, thus not used for flipping
5442          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5443          *    pixel by pixel
5444          *
5445          * If EXT_framebuffer_blit is supported that can be used instead. Note that EXT_framebuffer_blit implies
5446          * FBO support, so it doesn't really make sense to try and make it work with different offscreen rendering
5447          * backends. */
5448         if (fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5449                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5450                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5451         {
5452             surface_blt_fbo(device, Filter,
5453                     src_surface, SFLAG_INDRAWABLE, src_rect,
5454                     dst_surface, SFLAG_INDRAWABLE, dst_rect);
5455             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5456         }
5457         else if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5458                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5459         {
5460             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5461             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5462         } else {
5463             TRACE("Using hardware stretching to flip / stretch the texture\n");
5464             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5465         }
5466
5467         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5468         {
5469             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5470             dst_surface->resource.allocatedMemory = NULL;
5471             dst_surface->resource.heapMemory = NULL;
5472         }
5473         else
5474         {
5475             dst_surface->flags &= ~SFLAG_INSYSMEM;
5476         }
5477
5478         return WINED3D_OK;
5479     }
5480     else if (src_surface)
5481     {
5482         /* Blit from offscreen surface to render target */
5483         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5484         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5485
5486         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5487
5488         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5489                 && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5490                         src_rect, src_surface->resource.usage, src_surface->resource.pool,
5491                         src_surface->resource.format,
5492                         dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5493                         dst_surface->resource.format))
5494         {
5495             TRACE("Using surface_blt_fbo.\n");
5496             /* The source is always a texture, but never the currently active render target, and the texture
5497              * contents are never upside down. */
5498             surface_blt_fbo(device, Filter,
5499                     src_surface, SFLAG_INDRAWABLE, src_rect,
5500                     dst_surface, SFLAG_INDRAWABLE, dst_rect);
5501             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5502             return WINED3D_OK;
5503         }
5504
5505         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5506                 && arbfp_blit.blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5507                         src_rect, src_surface->resource.usage, src_surface->resource.pool,
5508                         src_surface->resource.format,
5509                         dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5510                         dst_surface->resource.format))
5511             return arbfp_blit_surface(device, Filter, src_surface, src_rect, dst_surface, dst_rect);
5512
5513         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5514                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5515                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5516         {
5517             FIXME("Unsupported blit operation falling back to software\n");
5518             return WINED3DERR_INVALIDCALL;
5519         }
5520
5521         /* Color keying: Check if we have to do a color keyed blt,
5522          * and if not check if a color key is activated.
5523          *
5524          * Just modify the color keying parameters in the surface and restore them afterwards
5525          * The surface keeps track of the color key last used to load the opengl surface.
5526          * PreLoad will catch the change to the flags and color key and reload if necessary.
5527          */
5528         if (flags & WINEDDBLT_KEYSRC)
5529         {
5530             /* Use color key from surface */
5531         }
5532         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5533         {
5534             /* Use color key from DDBltFx */
5535             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5536             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5537         }
5538         else
5539         {
5540             /* Do not use color key */
5541             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5542         }
5543
5544         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5545                 src_surface, src_rect, dst_surface, dst_rect);
5546
5547         /* Restore the color key parameters */
5548         src_surface->CKeyFlags = oldCKeyFlags;
5549         src_surface->SrcBltCKey = oldBltCKey;
5550
5551         surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5552
5553         return WINED3D_OK;
5554     }
5555     else
5556     {
5557         /* Source-Less Blit to render target */
5558         if (flags & WINEDDBLT_COLORFILL)
5559         {
5560             WINED3DCOLORVALUE color;
5561
5562             TRACE("Colorfill\n");
5563
5564             /* The color as given in the Blt function is in the surface format. */
5565             if (!surface_convert_color_to_float(dst_surface, DDBltFx->u5.dwFillColor, &color))
5566                 return WINED3DERR_INVALIDCALL;
5567
5568             return surface_color_fill(dst_surface, dst_rect, &color);
5569         }
5570     }
5571
5572     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5573     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5574     return WINED3DERR_INVALIDCALL;
5575 }
5576
5577 /* GL locking is done by the caller */
5578 static void surface_depth_blt(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
5579         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5580 {
5581     struct wined3d_device *device = surface->resource.device;
5582     GLint compare_mode = GL_NONE;
5583     struct blt_info info;
5584     GLint old_binding = 0;
5585     RECT rect;
5586
5587     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5588
5589     glDisable(GL_CULL_FACE);
5590     glDisable(GL_BLEND);
5591     glDisable(GL_ALPHA_TEST);
5592     glDisable(GL_SCISSOR_TEST);
5593     glDisable(GL_STENCIL_TEST);
5594     glEnable(GL_DEPTH_TEST);
5595     glDepthFunc(GL_ALWAYS);
5596     glDepthMask(GL_TRUE);
5597     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5598     glViewport(x, y, w, h);
5599
5600     SetRect(&rect, 0, h, w, 0);
5601     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5602     GL_EXTCALL(glActiveTextureARB(GL_TEXTURE0_ARB));
5603     glGetIntegerv(info.binding, &old_binding);
5604     glBindTexture(info.bind_target, texture);
5605     if (gl_info->supported[ARB_SHADOW])
5606     {
5607         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5608         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5609     }
5610
5611     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5612             gl_info, info.tex_type, &surface->ds_current_size);
5613
5614     glBegin(GL_TRIANGLE_STRIP);
5615     glTexCoord3fv(info.coords[0]);
5616     glVertex2f(-1.0f, -1.0f);
5617     glTexCoord3fv(info.coords[1]);
5618     glVertex2f(1.0f, -1.0f);
5619     glTexCoord3fv(info.coords[2]);
5620     glVertex2f(-1.0f, 1.0f);
5621     glTexCoord3fv(info.coords[3]);
5622     glVertex2f(1.0f, 1.0f);
5623     glEnd();
5624
5625     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5626     glBindTexture(info.bind_target, old_binding);
5627
5628     glPopAttrib();
5629
5630     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5631 }
5632
5633 void surface_modify_ds_location(struct wined3d_surface *surface,
5634         DWORD location, UINT w, UINT h)
5635 {
5636     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5637
5638     if (location & ~SFLAG_DS_LOCATIONS)
5639         FIXME("Invalid location (%#x) specified.\n", location);
5640
5641     surface->ds_current_size.cx = w;
5642     surface->ds_current_size.cy = h;
5643     surface->flags &= ~SFLAG_DS_LOCATIONS;
5644     surface->flags |= location;
5645 }
5646
5647 /* Context activation is done by the caller. */
5648 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5649 {
5650     struct wined3d_device *device = surface->resource.device;
5651     const struct wined3d_gl_info *gl_info = context->gl_info;
5652     GLsizei w, h;
5653
5654     TRACE("surface %p, new location %#x.\n", surface, location);
5655
5656     /* TODO: Make this work for modes other than FBO */
5657     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5658
5659     if (!(surface->flags & location))
5660     {
5661         w = surface->ds_current_size.cx;
5662         h = surface->ds_current_size.cy;
5663         surface->ds_current_size.cx = 0;
5664         surface->ds_current_size.cy = 0;
5665     }
5666     else
5667     {
5668         w = surface->resource.width;
5669         h = surface->resource.height;
5670     }
5671
5672     if (surface->ds_current_size.cx == surface->resource.width
5673             && surface->ds_current_size.cy == surface->resource.height)
5674     {
5675         TRACE("Location (%#x) is already up to date.\n", location);
5676         return;
5677     }
5678
5679     if (surface->current_renderbuffer)
5680     {
5681         FIXME("Not supported with fixed up depth stencil.\n");
5682         return;
5683     }
5684
5685     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5686     {
5687         /* This mostly happens when a depth / stencil is used without being
5688          * cleared first. In principle we could upload from sysmem, or
5689          * explicitly clear before first usage. For the moment there don't
5690          * appear to be a lot of applications depending on this, so a FIXME
5691          * should do. */
5692         FIXME("No up to date depth stencil location.\n");
5693         surface->flags |= location;
5694         surface->ds_current_size.cx = surface->resource.width;
5695         surface->ds_current_size.cy = surface->resource.height;
5696         return;
5697     }
5698
5699     if (location == SFLAG_DS_OFFSCREEN)
5700     {
5701         GLint old_binding = 0;
5702         GLenum bind_target;
5703
5704         /* The render target is allowed to be smaller than the depth/stencil
5705          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5706          * than the offscreen surface. Don't overwrite the offscreen surface
5707          * with undefined data. */
5708         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5709         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5710
5711         TRACE("Copying onscreen depth buffer to depth texture.\n");
5712
5713         ENTER_GL();
5714
5715         if (!device->depth_blt_texture)
5716         {
5717             glGenTextures(1, &device->depth_blt_texture);
5718         }
5719
5720         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5721          * directly on the FBO texture. That's because we need to flip. */
5722         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5723                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5724         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5725         {
5726             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5727             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5728         }
5729         else
5730         {
5731             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5732             bind_target = GL_TEXTURE_2D;
5733         }
5734         glBindTexture(bind_target, device->depth_blt_texture);
5735         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5736          * internal format, because the internal format might include stencil
5737          * data. In principle we should copy stencil data as well, but unless
5738          * the driver supports stencil export it's hard to do, and doesn't
5739          * seem to be needed in practice. If the hardware doesn't support
5740          * writing stencil data, the glCopyTexImage2D() call might trigger
5741          * software fallbacks. */
5742         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5743         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5744         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5745         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5746         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5747         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5748         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5749         glBindTexture(bind_target, old_binding);
5750
5751         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5752                 NULL, surface, SFLAG_INTEXTURE);
5753         context_set_draw_buffer(context, GL_NONE);
5754         glReadBuffer(GL_NONE);
5755
5756         /* Do the actual blit */
5757         surface_depth_blt(surface, gl_info, device->depth_blt_texture, 0, 0, w, h, bind_target);
5758         checkGLcall("depth_blt");
5759
5760         context_invalidate_state(context, STATE_FRAMEBUFFER);
5761
5762         LEAVE_GL();
5763
5764         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5765     }
5766     else if (location == SFLAG_DS_ONSCREEN)
5767     {
5768         TRACE("Copying depth texture to onscreen depth buffer.\n");
5769
5770         ENTER_GL();
5771
5772         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5773                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5774         surface_depth_blt(surface, gl_info, surface->texture_name,
5775                 0, surface->pow2Height - h, w, h, surface->texture_target);
5776         checkGLcall("depth_blt");
5777
5778         context_invalidate_state(context, STATE_FRAMEBUFFER);
5779
5780         LEAVE_GL();
5781
5782         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5783     }
5784     else
5785     {
5786         ERR("Invalid location (%#x) specified.\n", location);
5787     }
5788
5789     surface->flags |= location;
5790     surface->ds_current_size.cx = surface->resource.width;
5791     surface->ds_current_size.cy = surface->resource.height;
5792 }
5793
5794 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5795 {
5796     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5797     struct wined3d_surface *overlay;
5798
5799     TRACE("surface %p, location %s, persistent %#x.\n",
5800             surface, debug_surflocation(location), persistent);
5801
5802     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
5803     {
5804         if (surface_is_offscreen(surface))
5805         {
5806             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same
5807              * for offscreen targets. */
5808             if (location & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE))
5809                 location |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
5810         }
5811         else
5812         {
5813             TRACE("Surface %p is an onscreen surface.\n", surface);
5814         }
5815     }
5816
5817     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5818             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5819         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5820
5821     if (persistent)
5822     {
5823         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5824                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5825         {
5826             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5827             {
5828                 TRACE("Passing to container.\n");
5829                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5830             }
5831         }
5832         surface->flags &= ~SFLAG_LOCATIONS;
5833         surface->flags |= location;
5834
5835         /* Redraw emulated overlays, if any */
5836         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5837         {
5838             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5839             {
5840                 overlay->surface_ops->surface_draw_overlay(overlay);
5841             }
5842         }
5843     }
5844     else
5845     {
5846         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5847         {
5848             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5849             {
5850                 TRACE("Passing to container\n");
5851                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5852             }
5853         }
5854         surface->flags &= ~location;
5855     }
5856
5857     if (!(surface->flags & SFLAG_LOCATIONS))
5858     {
5859         ERR("Surface %p does not have any up to date location.\n", surface);
5860     }
5861 }
5862
5863 static DWORD resource_access_from_location(DWORD location)
5864 {
5865     switch (location)
5866     {
5867         case SFLAG_INSYSMEM:
5868             return WINED3D_RESOURCE_ACCESS_CPU;
5869
5870         case SFLAG_INDRAWABLE:
5871         case SFLAG_INSRGBTEX:
5872         case SFLAG_INTEXTURE:
5873             return WINED3D_RESOURCE_ACCESS_GPU;
5874
5875         default:
5876             FIXME("Unhandled location %#x.\n", location);
5877             return 0;
5878     }
5879 }
5880
5881 static void surface_load_sysmem(struct wined3d_surface *surface,
5882         const struct wined3d_gl_info *gl_info, const RECT *rect)
5883 {
5884     surface_prepare_system_memory(surface);
5885
5886     /* Download the surface to system memory. */
5887     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5888     {
5889         struct wined3d_device *device = surface->resource.device;
5890         struct wined3d_context *context = NULL;
5891
5892         if (!device->isInDraw)
5893             context = context_acquire(device, NULL);
5894
5895         surface_bind_and_dirtify(surface, gl_info, !(surface->flags & SFLAG_INTEXTURE));
5896         surface_download_data(surface, gl_info);
5897
5898         if (context)
5899             context_release(context);
5900
5901         return;
5902     }
5903
5904     /* Note: It might be faster to download into a texture first. */
5905     read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5906             wined3d_surface_get_pitch(surface));
5907 }
5908
5909 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5910         const struct wined3d_gl_info *gl_info, const RECT *rect)
5911 {
5912     struct wined3d_device *device = surface->resource.device;
5913     struct wined3d_format format;
5914     CONVERT_TYPES convert;
5915     UINT byte_count;
5916     BYTE *mem;
5917
5918     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5919         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5920
5921     if (surface->flags & SFLAG_INTEXTURE)
5922     {
5923         RECT r;
5924
5925         surface_get_rect(surface, rect, &r);
5926         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5927
5928         return WINED3D_OK;
5929     }
5930
5931     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5932     {
5933         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5934          * path through sysmem. */
5935         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5936     }
5937
5938     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5939
5940     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5941      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5942      * called. */
5943     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5944     {
5945         struct wined3d_context *context = NULL;
5946
5947         TRACE("Removing the pbo attached to surface %p.\n", surface);
5948
5949         if (!device->isInDraw)
5950             context = context_acquire(device, NULL);
5951
5952         surface_remove_pbo(surface, gl_info);
5953
5954         if (context)
5955             context_release(context);
5956     }
5957
5958     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5959     {
5960         UINT height = surface->resource.height;
5961         UINT width = surface->resource.width;
5962         UINT src_pitch, dst_pitch;
5963
5964         byte_count = format.conv_byte_count;
5965         src_pitch = wined3d_surface_get_pitch(surface);
5966
5967         /* Stick to the alignment for the converted surface too, makes it
5968          * easier to load the surface. */
5969         dst_pitch = width * byte_count;
5970         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5971
5972         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5973         {
5974             ERR("Out of memory (%u).\n", dst_pitch * height);
5975             return E_OUTOFMEMORY;
5976         }
5977
5978         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5979                 src_pitch, width, height, dst_pitch, convert, surface);
5980
5981         surface->flags |= SFLAG_CONVERTED;
5982     }
5983     else
5984     {
5985         surface->flags &= ~SFLAG_CONVERTED;
5986         mem = surface->resource.allocatedMemory;
5987         byte_count = format.byte_count;
5988     }
5989
5990     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5991
5992     /* Don't delete PBO memory. */
5993     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5994         HeapFree(GetProcessHeap(), 0, mem);
5995
5996     return WINED3D_OK;
5997 }
5998
5999 static HRESULT surface_load_texture(struct wined3d_surface *surface,
6000         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
6001 {
6002     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
6003     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
6004     struct wined3d_device *device = surface->resource.device;
6005     struct wined3d_context *context = NULL;
6006     UINT width, src_pitch, dst_pitch;
6007     struct wined3d_bo_address data;
6008     struct wined3d_format format;
6009     POINT dst_point = {0, 0};
6010     CONVERT_TYPES convert;
6011     BYTE *mem;
6012
6013     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6014             && surface_is_offscreen(surface)
6015             && (surface->flags & SFLAG_INDRAWABLE))
6016     {
6017         read_from_framebuffer_texture(surface, srgb);
6018
6019         return WINED3D_OK;
6020     }
6021
6022     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6023             && (surface->resource.format->flags & attach_flags) == attach_flags
6024             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6025                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6026                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6027     {
6028         if (srgb)
6029             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
6030                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6031         else
6032             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
6033                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6034
6035         return WINED3D_OK;
6036     }
6037
6038     /* Upload from system memory */
6039
6040     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6041             TRUE /* We will use textures */, &format, &convert);
6042
6043     if (srgb)
6044     {
6045         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6046         {
6047             /* Performance warning... */
6048             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6049             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6050         }
6051     }
6052     else
6053     {
6054         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6055         {
6056             /* Performance warning... */
6057             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6058             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6059         }
6060     }
6061
6062     if (!(surface->flags & SFLAG_INSYSMEM))
6063     {
6064         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6065         /* Lets hope we get it from somewhere... */
6066         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6067     }
6068
6069     if (!device->isInDraw)
6070         context = context_acquire(device, NULL);
6071
6072     surface_prepare_texture(surface, gl_info, srgb);
6073     surface_bind_and_dirtify(surface, gl_info, srgb);
6074
6075     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6076     {
6077         surface->flags |= SFLAG_GLCKEY;
6078         surface->glCKey = surface->SrcBltCKey;
6079     }
6080     else surface->flags &= ~SFLAG_GLCKEY;
6081
6082     width = surface->resource.width;
6083     src_pitch = wined3d_surface_get_pitch(surface);
6084
6085     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6086      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6087      * called. */
6088     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6089     {
6090         TRACE("Removing the pbo attached to surface %p.\n", surface);
6091         surface_remove_pbo(surface, gl_info);
6092     }
6093
6094     if (format.convert)
6095     {
6096         /* This code is entered for texture formats which need a fixup. */
6097         UINT height = surface->resource.height;
6098
6099         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6100         dst_pitch = width * format.conv_byte_count;
6101         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6102
6103         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6104         {
6105             ERR("Out of memory (%u).\n", dst_pitch * height);
6106             if (context)
6107                 context_release(context);
6108             return E_OUTOFMEMORY;
6109         }
6110         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6111     }
6112     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6113     {
6114         /* This code is only entered for color keying fixups */
6115         UINT height = surface->resource.height;
6116
6117         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6118         dst_pitch = width * format.conv_byte_count;
6119         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6120
6121         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6122         {
6123             ERR("Out of memory (%u).\n", dst_pitch * height);
6124             if (context)
6125                 context_release(context);
6126             return E_OUTOFMEMORY;
6127         }
6128         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6129                 width, height, dst_pitch, convert, surface);
6130     }
6131     else
6132     {
6133         mem = surface->resource.allocatedMemory;
6134     }
6135
6136     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6137     data.addr = mem;
6138     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6139
6140     if (context)
6141         context_release(context);
6142
6143     /* Don't delete PBO memory. */
6144     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6145         HeapFree(GetProcessHeap(), 0, mem);
6146
6147     return WINED3D_OK;
6148 }
6149
6150 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6151 {
6152     struct wined3d_device *device = surface->resource.device;
6153     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6154     BOOL in_fbo = FALSE;
6155     HRESULT hr;
6156
6157     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6158
6159     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6160     {
6161         if (location == SFLAG_INTEXTURE)
6162         {
6163             struct wined3d_context *context = context_acquire(device, NULL);
6164             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6165             context_release(context);
6166             return WINED3D_OK;
6167         }
6168         else
6169         {
6170             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6171             return WINED3DERR_INVALIDCALL;
6172         }
6173     }
6174
6175     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6176     {
6177         if (surface_is_offscreen(surface))
6178         {
6179             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same
6180              * for offscreen targets. Prefer SFLAG_INTEXTURE. */
6181             if (location == SFLAG_INDRAWABLE)
6182                 location = SFLAG_INTEXTURE;
6183             in_fbo = TRUE;
6184         }
6185         else
6186         {
6187             TRACE("Surface %p is an onscreen surface.\n", surface);
6188         }
6189     }
6190
6191     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6192         location = SFLAG_INTEXTURE;
6193
6194     if (surface->flags & location)
6195     {
6196         TRACE("Location already up to date.\n");
6197         return WINED3D_OK;
6198     }
6199
6200     if (WARN_ON(d3d_surface))
6201     {
6202         DWORD required_access = resource_access_from_location(location);
6203         if ((surface->resource.access_flags & required_access) != required_access)
6204             WARN("Operation requires %#x access, but surface only has %#x.\n",
6205                     required_access, surface->resource.access_flags);
6206     }
6207
6208     if (!(surface->flags & SFLAG_LOCATIONS))
6209     {
6210         ERR("Surface %p does not have any up to date location.\n", surface);
6211         surface->flags |= SFLAG_LOST;
6212         return WINED3DERR_DEVICELOST;
6213     }
6214
6215     switch (location)
6216     {
6217         case SFLAG_INSYSMEM:
6218             surface_load_sysmem(surface, gl_info, rect);
6219             break;
6220
6221         case SFLAG_INDRAWABLE:
6222             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6223                 return hr;
6224             break;
6225
6226         case SFLAG_INTEXTURE:
6227         case SFLAG_INSRGBTEX:
6228             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6229                 return hr;
6230             break;
6231
6232         default:
6233             ERR("Don't know how to handle location %#x.\n", location);
6234             break;
6235     }
6236
6237     if (!rect)
6238     {
6239         surface->flags |= location;
6240
6241         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6242             surface_evict_sysmem(surface);
6243     }
6244
6245     if (in_fbo && (surface->flags & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)))
6246     {
6247         /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
6248         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
6249     }
6250
6251     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6252             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6253     {
6254         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6255     }
6256
6257     return WINED3D_OK;
6258 }
6259
6260 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6261 {
6262     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6263
6264     /* Not on a swapchain - must be offscreen */
6265     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6266
6267     /* The front buffer is always onscreen */
6268     if (surface == swapchain->front_buffer) return FALSE;
6269
6270     /* If the swapchain is rendered to an FBO, the backbuffer is
6271      * offscreen, otherwise onscreen */
6272     return swapchain->render_to_fbo;
6273 }
6274
6275 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6276 /* Context activation is done by the caller. */
6277 static void ffp_blit_free(struct wined3d_device *device) { }
6278
6279 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6280 /* Context activation is done by the caller. */
6281 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6282 {
6283     BYTE table[256][4];
6284     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6285
6286     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6287
6288     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6289     ENTER_GL();
6290     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6291     LEAVE_GL();
6292 }
6293
6294 /* Context activation is done by the caller. */
6295 static HRESULT ffp_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6296 {
6297     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6298
6299     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6300      * else the surface is converted in software at upload time in LoadLocation.
6301      */
6302     if(fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6303         ffp_blit_p8_upload_palette(surface, gl_info);
6304
6305     ENTER_GL();
6306     glEnable(surface->texture_target);
6307     checkGLcall("glEnable(surface->texture_target)");
6308     LEAVE_GL();
6309     return WINED3D_OK;
6310 }
6311
6312 /* Context activation is done by the caller. */
6313 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6314 {
6315     ENTER_GL();
6316     glDisable(GL_TEXTURE_2D);
6317     checkGLcall("glDisable(GL_TEXTURE_2D)");
6318     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6319     {
6320         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6321         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6322     }
6323     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6324     {
6325         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6326         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6327     }
6328     LEAVE_GL();
6329 }
6330
6331 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6332         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6333         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6334 {
6335     enum complex_fixup src_fixup;
6336
6337     switch (blit_op)
6338     {
6339         case WINED3D_BLIT_OP_COLOR_BLIT:
6340             src_fixup = get_complex_fixup(src_format->color_fixup);
6341             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6342             {
6343                 TRACE("Checking support for fixup:\n");
6344                 dump_color_fixup_desc(src_format->color_fixup);
6345             }
6346
6347             if (!is_identity_fixup(dst_format->color_fixup))
6348             {
6349                 TRACE("Destination fixups are not supported\n");
6350                 return FALSE;
6351             }
6352
6353             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6354             {
6355                 TRACE("P8 fixup supported\n");
6356                 return TRUE;
6357             }
6358
6359             /* We only support identity conversions. */
6360             if (is_identity_fixup(src_format->color_fixup))
6361             {
6362                 TRACE("[OK]\n");
6363                 return TRUE;
6364             }
6365
6366             TRACE("[FAILED]\n");
6367             return FALSE;
6368
6369         case WINED3D_BLIT_OP_COLOR_FILL:
6370             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6371             {
6372                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6373                     return FALSE;
6374             }
6375             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6376             {
6377                 TRACE("Color fill not supported\n");
6378                 return FALSE;
6379             }
6380
6381             /* FIXME: We should reject color fills on formats with fixups,
6382              * but this would break P8 color fills for example. */
6383
6384             return TRUE;
6385
6386         case WINED3D_BLIT_OP_DEPTH_FILL:
6387             return TRUE;
6388
6389         default:
6390             TRACE("Unsupported blit_op=%d\n", blit_op);
6391             return FALSE;
6392     }
6393 }
6394
6395 /* Do not call while under the GL lock. */
6396 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6397         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6398 {
6399     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6400     struct wined3d_fb_state fb = {&dst_surface, NULL};
6401
6402     return device_clear_render_targets(device, 1, &fb,
6403             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6404 }
6405
6406 /* Do not call while under the GL lock. */
6407 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6408         struct wined3d_surface *surface, const RECT *rect, float depth)
6409 {
6410     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6411     struct wined3d_fb_state fb = {NULL, surface};
6412
6413     return device_clear_render_targets(device, 0, &fb,
6414             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6415 }
6416
6417 const struct blit_shader ffp_blit =  {
6418     ffp_blit_alloc,
6419     ffp_blit_free,
6420     ffp_blit_set,
6421     ffp_blit_unset,
6422     ffp_blit_supported,
6423     ffp_blit_color_fill,
6424     ffp_blit_depth_fill,
6425 };
6426
6427 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6428 {
6429     return WINED3D_OK;
6430 }
6431
6432 /* Context activation is done by the caller. */
6433 static void cpu_blit_free(struct wined3d_device *device)
6434 {
6435 }
6436
6437 /* Context activation is done by the caller. */
6438 static HRESULT cpu_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6439 {
6440     return WINED3D_OK;
6441 }
6442
6443 /* Context activation is done by the caller. */
6444 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6445 {
6446 }
6447
6448 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6449         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6450         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6451 {
6452     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6453     {
6454         return TRUE;
6455     }
6456
6457     return FALSE;
6458 }
6459
6460 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6461         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6462         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6463 {
6464     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6465     const struct wined3d_format *src_format, *dst_format;
6466     struct wined3d_surface *orig_src = src_surface;
6467     WINED3DLOCKED_RECT dlock, slock;
6468     HRESULT hr = WINED3D_OK;
6469     const BYTE *sbuf;
6470     RECT xdst,xsrc;
6471     BYTE *dbuf;
6472     int x, y;
6473
6474     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6475             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6476             flags, fx, debug_d3dtexturefiltertype(filter));
6477
6478     xsrc = *src_rect;
6479
6480     if (!src_surface)
6481     {
6482         RECT full_rect;
6483
6484         full_rect.left = 0;
6485         full_rect.top = 0;
6486         full_rect.right = dst_surface->resource.width;
6487         full_rect.bottom = dst_surface->resource.height;
6488         IntersectRect(&xdst, &full_rect, dst_rect);
6489     }
6490     else
6491     {
6492         BOOL clip_horiz, clip_vert;
6493
6494         xdst = *dst_rect;
6495         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6496         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6497
6498         if (clip_vert || clip_horiz)
6499         {
6500             /* Now check if this is a special case or not... */
6501             if ((flags & WINEDDBLT_DDFX)
6502                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6503                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6504             {
6505                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6506                 return WINED3D_OK;
6507             }
6508
6509             if (clip_horiz)
6510             {
6511                 if (xdst.left < 0)
6512                 {
6513                     xsrc.left -= xdst.left;
6514                     xdst.left = 0;
6515                 }
6516                 if (xdst.right > dst_surface->resource.width)
6517                 {
6518                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6519                     xdst.right = (int)dst_surface->resource.width;
6520                 }
6521             }
6522
6523             if (clip_vert)
6524             {
6525                 if (xdst.top < 0)
6526                 {
6527                     xsrc.top -= xdst.top;
6528                     xdst.top = 0;
6529                 }
6530                 if (xdst.bottom > dst_surface->resource.height)
6531                 {
6532                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6533                     xdst.bottom = (int)dst_surface->resource.height;
6534                 }
6535             }
6536
6537             /* And check if after clipping something is still to be done... */
6538             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6539                     || (xdst.left >= (int)dst_surface->resource.width)
6540                     || (xdst.top >= (int)dst_surface->resource.height)
6541                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6542                     || (xsrc.left >= (int)src_surface->resource.width)
6543                     || (xsrc.top >= (int)src_surface->resource.height))
6544             {
6545                 TRACE("Nothing to be done after clipping.\n");
6546                 return WINED3D_OK;
6547             }
6548         }
6549     }
6550
6551     if (src_surface == dst_surface)
6552     {
6553         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6554         slock = dlock;
6555         src_format = dst_surface->resource.format;
6556         dst_format = src_format;
6557     }
6558     else
6559     {
6560         dst_format = dst_surface->resource.format;
6561         if (src_surface)
6562         {
6563             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6564             {
6565                 src_surface = surface_convert_format(src_surface, dst_format->id);
6566                 if (!src_surface)
6567                 {
6568                     /* The conv function writes a FIXME */
6569                     WARN("Cannot convert source surface format to dest format.\n");
6570                     goto release;
6571                 }
6572             }
6573             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6574             src_format = src_surface->resource.format;
6575         }
6576         else
6577         {
6578             src_format = dst_format;
6579         }
6580         if (dst_rect)
6581             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6582         else
6583             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6584     }
6585
6586     bpp = dst_surface->resource.format->byte_count;
6587     srcheight = xsrc.bottom - xsrc.top;
6588     srcwidth = xsrc.right - xsrc.left;
6589     dstheight = xdst.bottom - xdst.top;
6590     dstwidth = xdst.right - xdst.left;
6591     width = (xdst.right - xdst.left) * bpp;
6592
6593     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
6594     {
6595         UINT row_block_count;
6596
6597         if (flags || src_surface == dst_surface)
6598         {
6599             FIXME("Only plain blits supported on compressed surfaces.\n");
6600             hr = E_NOTIMPL;
6601             goto release;
6602         }
6603
6604         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6605
6606         if (srcheight != dstheight || srcwidth != dstwidth)
6607         {
6608             WARN("Stretching not supported on compressed surfaces.\n");
6609             hr = WINED3DERR_INVALIDCALL;
6610             goto release;
6611         }
6612
6613         dbuf = dlock.pBits;
6614         sbuf = slock.pBits;
6615
6616         row_block_count = (dstwidth + dst_format->block_width - 1) / dst_format->block_width;
6617         for (y = 0; y < dstheight; y += dst_format->block_height)
6618         {
6619             memcpy(dbuf, sbuf, row_block_count * dst_format->block_byte_count);
6620             dbuf += dlock.Pitch;
6621             sbuf += slock.Pitch;
6622         }
6623
6624         goto release;
6625     }
6626
6627     if (dst_rect && src_surface != dst_surface)
6628         dbuf = dlock.pBits;
6629     else
6630         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6631
6632     /* First, all the 'source-less' blits */
6633     if (flags & WINEDDBLT_COLORFILL)
6634     {
6635         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6636         flags &= ~WINEDDBLT_COLORFILL;
6637     }
6638
6639     if (flags & WINEDDBLT_DEPTHFILL)
6640     {
6641         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6642     }
6643     if (flags & WINEDDBLT_ROP)
6644     {
6645         /* Catch some degenerate cases here. */
6646         switch (fx->dwROP)
6647         {
6648             case BLACKNESS:
6649                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6650                 break;
6651             case 0xAA0029: /* No-op */
6652                 break;
6653             case WHITENESS:
6654                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6655                 break;
6656             case SRCCOPY: /* Well, we do that below? */
6657                 break;
6658             default:
6659                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6660                 goto error;
6661         }
6662         flags &= ~WINEDDBLT_ROP;
6663     }
6664     if (flags & WINEDDBLT_DDROPS)
6665     {
6666         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6667     }
6668     /* Now the 'with source' blits. */
6669     if (src_surface)
6670     {
6671         const BYTE *sbase;
6672         int sx, xinc, sy, yinc;
6673
6674         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6675             goto release;
6676
6677         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6678                 && (srcwidth != dstwidth || srcheight != dstheight))
6679         {
6680             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6681             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6682         }
6683
6684         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6685         xinc = (srcwidth << 16) / dstwidth;
6686         yinc = (srcheight << 16) / dstheight;
6687
6688         if (!flags)
6689         {
6690             /* No effects, we can cheat here. */
6691             if (dstwidth == srcwidth)
6692             {
6693                 if (dstheight == srcheight)
6694                 {
6695                     /* No stretching in either direction. This needs to be as
6696                      * fast as possible. */
6697                     sbuf = sbase;
6698
6699                     /* Check for overlapping surfaces. */
6700                     if (src_surface != dst_surface || xdst.top < xsrc.top
6701                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6702                     {
6703                         /* No overlap, or dst above src, so copy from top downwards. */
6704                         for (y = 0; y < dstheight; ++y)
6705                         {
6706                             memcpy(dbuf, sbuf, width);
6707                             sbuf += slock.Pitch;
6708                             dbuf += dlock.Pitch;
6709                         }
6710                     }
6711                     else if (xdst.top > xsrc.top)
6712                     {
6713                         /* Copy from bottom upwards. */
6714                         sbuf += (slock.Pitch*dstheight);
6715                         dbuf += (dlock.Pitch*dstheight);
6716                         for (y = 0; y < dstheight; ++y)
6717                         {
6718                             sbuf -= slock.Pitch;
6719                             dbuf -= dlock.Pitch;
6720                             memcpy(dbuf, sbuf, width);
6721                         }
6722                     }
6723                     else
6724                     {
6725                         /* Src and dst overlapping on the same line, use memmove. */
6726                         for (y = 0; y < dstheight; ++y)
6727                         {
6728                             memmove(dbuf, sbuf, width);
6729                             sbuf += slock.Pitch;
6730                             dbuf += dlock.Pitch;
6731                         }
6732                     }
6733                 }
6734                 else
6735                 {
6736                     /* Stretching in y direction only. */
6737                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6738                     {
6739                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6740                         memcpy(dbuf, sbuf, width);
6741                         dbuf += dlock.Pitch;
6742                     }
6743                 }
6744             }
6745             else
6746             {
6747                 /* Stretching in X direction. */
6748                 int last_sy = -1;
6749                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6750                 {
6751                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6752
6753                     if ((sy >> 16) == (last_sy >> 16))
6754                     {
6755                         /* This source row is the same as last source row -
6756                          * Copy the already stretched row. */
6757                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6758                     }
6759                     else
6760                     {
6761 #define STRETCH_ROW(type) \
6762 do { \
6763     const type *s = (const type *)sbuf; \
6764     type *d = (type *)dbuf; \
6765     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6766         d[x] = s[sx >> 16]; \
6767 } while(0)
6768
6769                         switch(bpp)
6770                         {
6771                             case 1:
6772                                 STRETCH_ROW(BYTE);
6773                                 break;
6774                             case 2:
6775                                 STRETCH_ROW(WORD);
6776                                 break;
6777                             case 4:
6778                                 STRETCH_ROW(DWORD);
6779                                 break;
6780                             case 3:
6781                             {
6782                                 const BYTE *s;
6783                                 BYTE *d = dbuf;
6784                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6785                                 {
6786                                     DWORD pixel;
6787
6788                                     s = sbuf + 3 * (sx >> 16);
6789                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6790                                     d[0] = (pixel      ) & 0xff;
6791                                     d[1] = (pixel >>  8) & 0xff;
6792                                     d[2] = (pixel >> 16) & 0xff;
6793                                     d += 3;
6794                                 }
6795                                 break;
6796                             }
6797                             default:
6798                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6799                                 hr = WINED3DERR_NOTAVAILABLE;
6800                                 goto error;
6801                         }
6802 #undef STRETCH_ROW
6803                     }
6804                     dbuf += dlock.Pitch;
6805                     last_sy = sy;
6806                 }
6807             }
6808         }
6809         else
6810         {
6811             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6812             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6813             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6814             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6815             {
6816                 /* The color keying flags are checked for correctness in ddraw */
6817                 if (flags & WINEDDBLT_KEYSRC)
6818                 {
6819                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6820                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6821                 }
6822                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6823                 {
6824                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6825                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6826                 }
6827
6828                 if (flags & WINEDDBLT_KEYDEST)
6829                 {
6830                     /* Destination color keys are taken from the source surface! */
6831                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6832                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6833                 }
6834                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6835                 {
6836                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6837                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6838                 }
6839
6840                 if (bpp == 1)
6841                 {
6842                     keymask = 0xff;
6843                 }
6844                 else
6845                 {
6846                     keymask = src_format->red_mask
6847                             | src_format->green_mask
6848                             | src_format->blue_mask;
6849                 }
6850                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6851             }
6852
6853             if (flags & WINEDDBLT_DDFX)
6854             {
6855                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6856                 LONG tmpxy;
6857                 dTopLeft     = dbuf;
6858                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6859                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6860                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6861
6862                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6863                 {
6864                     /* I don't think we need to do anything about this flag */
6865                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6866                 }
6867                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6868                 {
6869                     tmp          = dTopRight;
6870                     dTopRight    = dTopLeft;
6871                     dTopLeft     = tmp;
6872                     tmp          = dBottomRight;
6873                     dBottomRight = dBottomLeft;
6874                     dBottomLeft  = tmp;
6875                     dstxinc = dstxinc * -1;
6876                 }
6877                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6878                 {
6879                     tmp          = dTopLeft;
6880                     dTopLeft     = dBottomLeft;
6881                     dBottomLeft  = tmp;
6882                     tmp          = dTopRight;
6883                     dTopRight    = dBottomRight;
6884                     dBottomRight = tmp;
6885                     dstyinc = dstyinc * -1;
6886                 }
6887                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6888                 {
6889                     /* I don't think we need to do anything about this flag */
6890                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6891                 }
6892                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6893                 {
6894                     tmp          = dBottomRight;
6895                     dBottomRight = dTopLeft;
6896                     dTopLeft     = tmp;
6897                     tmp          = dBottomLeft;
6898                     dBottomLeft  = dTopRight;
6899                     dTopRight    = tmp;
6900                     dstxinc = dstxinc * -1;
6901                     dstyinc = dstyinc * -1;
6902                 }
6903                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6904                 {
6905                     tmp          = dTopLeft;
6906                     dTopLeft     = dBottomLeft;
6907                     dBottomLeft  = dBottomRight;
6908                     dBottomRight = dTopRight;
6909                     dTopRight    = tmp;
6910                     tmpxy   = dstxinc;
6911                     dstxinc = dstyinc;
6912                     dstyinc = tmpxy;
6913                     dstxinc = dstxinc * -1;
6914                 }
6915                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6916                 {
6917                     tmp          = dTopLeft;
6918                     dTopLeft     = dTopRight;
6919                     dTopRight    = dBottomRight;
6920                     dBottomRight = dBottomLeft;
6921                     dBottomLeft  = tmp;
6922                     tmpxy   = dstxinc;
6923                     dstxinc = dstyinc;
6924                     dstyinc = tmpxy;
6925                     dstyinc = dstyinc * -1;
6926                 }
6927                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6928                 {
6929                     /* I don't think we need to do anything about this flag */
6930                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6931                 }
6932                 dbuf = dTopLeft;
6933                 flags &= ~(WINEDDBLT_DDFX);
6934             }
6935
6936 #define COPY_COLORKEY_FX(type) \
6937 do { \
6938     const type *s; \
6939     type *d = (type *)dbuf, *dx, tmp; \
6940     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6941     { \
6942         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6943         dx = d; \
6944         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6945         { \
6946             tmp = s[sx >> 16]; \
6947             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6948                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6949             { \
6950                 dx[0] = tmp; \
6951             } \
6952             dx = (type *)(((BYTE *)dx) + dstxinc); \
6953         } \
6954         d = (type *)(((BYTE *)d) + dstyinc); \
6955     } \
6956 } while(0)
6957
6958             switch (bpp)
6959             {
6960                 case 1:
6961                     COPY_COLORKEY_FX(BYTE);
6962                     break;
6963                 case 2:
6964                     COPY_COLORKEY_FX(WORD);
6965                     break;
6966                 case 4:
6967                     COPY_COLORKEY_FX(DWORD);
6968                     break;
6969                 case 3:
6970                 {
6971                     const BYTE *s;
6972                     BYTE *d = dbuf, *dx;
6973                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6974                     {
6975                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6976                         dx = d;
6977                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
6978                         {
6979                             DWORD pixel, dpixel = 0;
6980                             s = sbuf + 3 * (sx>>16);
6981                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6982                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
6983                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
6984                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
6985                             {
6986                                 dx[0] = (pixel      ) & 0xff;
6987                                 dx[1] = (pixel >>  8) & 0xff;
6988                                 dx[2] = (pixel >> 16) & 0xff;
6989                             }
6990                             dx += dstxinc;
6991                         }
6992                         d += dstyinc;
6993                     }
6994                     break;
6995                 }
6996                 default:
6997                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
6998                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
6999                     hr = WINED3DERR_NOTAVAILABLE;
7000                     goto error;
7001 #undef COPY_COLORKEY_FX
7002             }
7003         }
7004     }
7005
7006 error:
7007     if (flags && FIXME_ON(d3d_surface))
7008     {
7009         FIXME("\tUnsupported flags: %#x.\n", flags);
7010     }
7011
7012 release:
7013     wined3d_surface_unmap(dst_surface);
7014     if (src_surface && src_surface != dst_surface)
7015         wined3d_surface_unmap(src_surface);
7016     /* Release the converted surface, if any. */
7017     if (src_surface && src_surface != orig_src)
7018         wined3d_surface_decref(src_surface);
7019
7020     return hr;
7021 }
7022
7023 /* Do not call while under the GL lock. */
7024 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7025         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7026 {
7027     WINEDDBLTFX BltFx;
7028
7029     memset(&BltFx, 0, sizeof(BltFx));
7030     BltFx.dwSize = sizeof(BltFx);
7031     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7032     return wined3d_surface_blt(dst_surface, dst_rect, NULL, NULL,
7033             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7034 }
7035
7036 /* Do not call while under the GL lock. */
7037 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7038         struct wined3d_surface *surface, const RECT *rect, float depth)
7039 {
7040     FIXME("Depth filling not implemented by cpu_blit.\n");
7041     return WINED3DERR_INVALIDCALL;
7042 }
7043
7044 const struct blit_shader cpu_blit =  {
7045     cpu_blit_alloc,
7046     cpu_blit_free,
7047     cpu_blit_set,
7048     cpu_blit_unset,
7049     cpu_blit_supported,
7050     cpu_blit_color_fill,
7051     cpu_blit_depth_fill,
7052 };
7053
7054 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7055         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7056         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7057         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7058 {
7059     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7060     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7061     unsigned int resource_size;
7062     HRESULT hr;
7063
7064     if (multisample_quality > 0)
7065     {
7066         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7067         multisample_quality = 0;
7068     }
7069
7070     /* Quick lockable sanity check.
7071      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7072      * this function is too deep to need to care about things like this.
7073      * Levels need to be checked too, since they all affect what can be done. */
7074     switch (pool)
7075     {
7076         case WINED3DPOOL_SCRATCH:
7077             if (!lockable)
7078             {
7079                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7080                         "which are mutually exclusive, setting lockable to TRUE.\n");
7081                 lockable = TRUE;
7082             }
7083             break;
7084
7085         case WINED3DPOOL_SYSTEMMEM:
7086             if (!lockable)
7087                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7088             break;
7089
7090         case WINED3DPOOL_MANAGED:
7091             if (usage & WINED3DUSAGE_DYNAMIC)
7092                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7093             break;
7094
7095         case WINED3DPOOL_DEFAULT:
7096             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7097                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7098             break;
7099
7100         default:
7101             FIXME("Unknown pool %#x.\n", pool);
7102             break;
7103     };
7104
7105     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7106         FIXME("Trying to create a render target that isn't in the default pool.\n");
7107
7108     /* FIXME: Check that the format is supported by the device. */
7109
7110     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7111     if (!resource_size)
7112         return WINED3DERR_INVALIDCALL;
7113
7114     surface->surface_type = surface_type;
7115
7116     switch (surface_type)
7117     {
7118         case SURFACE_OPENGL:
7119             surface->surface_ops = &surface_ops;
7120             break;
7121
7122         case SURFACE_GDI:
7123             surface->surface_ops = &gdi_surface_ops;
7124             break;
7125
7126         default:
7127             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7128             return WINED3DERR_INVALIDCALL;
7129     }
7130
7131     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7132             multisample_type, multisample_quality, usage, pool, width, height, 1,
7133             resource_size, parent, parent_ops, &surface_resource_ops);
7134     if (FAILED(hr))
7135     {
7136         WARN("Failed to initialize resource, returning %#x.\n", hr);
7137         return hr;
7138     }
7139
7140     /* "Standalone" surface. */
7141     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7142
7143     surface->texture_level = level;
7144     list_init(&surface->overlays);
7145
7146     /* Flags */
7147     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7148     if (discard)
7149         surface->flags |= SFLAG_DISCARD;
7150     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7151         surface->flags |= SFLAG_LOCKABLE;
7152     /* I'm not sure if this qualifies as a hack or as an optimization. It
7153      * seems reasonable to assume that lockable render targets will get
7154      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7155      * creation. However, the other reason we want to do this is that several
7156      * ddraw applications access surface memory while the surface isn't
7157      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7158      * future locks prevents these from crashing. */
7159     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7160         surface->flags |= SFLAG_DYNLOCK;
7161
7162     /* Mark the texture as dirty so that it gets loaded first time around. */
7163     surface_add_dirty_rect(surface, NULL);
7164     list_init(&surface->renderbuffers);
7165
7166     TRACE("surface %p, memory %p, size %u\n",
7167             surface, surface->resource.allocatedMemory, surface->resource.size);
7168
7169     /* Call the private setup routine */
7170     hr = surface->surface_ops->surface_private_setup(surface);
7171     if (FAILED(hr))
7172     {
7173         ERR("Private setup failed, returning %#x\n", hr);
7174         surface->surface_ops->surface_cleanup(surface);
7175         return hr;
7176     }
7177
7178     return hr;
7179 }
7180
7181 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7182         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7183         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7184         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7185 {
7186     struct wined3d_surface *object;
7187     HRESULT hr;
7188
7189     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7190             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7191     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7192             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7193     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7194
7195     if (surface_type == SURFACE_OPENGL && !device->adapter)
7196     {
7197         ERR("OpenGL surfaces are not available without OpenGL.\n");
7198         return WINED3DERR_NOTAVAILABLE;
7199     }
7200
7201     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7202     if (!object)
7203     {
7204         ERR("Failed to allocate surface memory.\n");
7205         return WINED3DERR_OUTOFVIDEOMEMORY;
7206     }
7207
7208     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7209             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7210     if (FAILED(hr))
7211     {
7212         WARN("Failed to initialize surface, returning %#x.\n", hr);
7213         HeapFree(GetProcessHeap(), 0, object);
7214         return hr;
7215     }
7216
7217     TRACE("Created surface %p.\n", object);
7218     *surface = object;
7219
7220     return hr;
7221 }