wined3d: Rename IWineD3DDeviceImpl_MarkStateDirty() to device_invalidate_state().
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT surface_cpu_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans);
41 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
42         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
43         WINED3DTEXTUREFILTERTYPE filter);
44
45 static void surface_cleanup(struct wined3d_surface *surface)
46 {
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO) || !list_empty(&surface->renderbuffers))
50     {
51         struct wined3d_renderbuffer_entry *entry, *entry2;
52         const struct wined3d_gl_info *gl_info;
53         struct wined3d_context *context;
54
55         context = context_acquire(surface->resource.device, NULL);
56         gl_info = context->gl_info;
57
58         ENTER_GL();
59
60         if (surface->texture_name)
61         {
62             TRACE("Deleting texture %u.\n", surface->texture_name);
63             glDeleteTextures(1, &surface->texture_name);
64         }
65
66         if (surface->flags & SFLAG_PBO)
67         {
68             TRACE("Deleting PBO %u.\n", surface->pbo);
69             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
70         }
71
72         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
73         {
74             TRACE("Deleting renderbuffer %u.\n", entry->id);
75             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
76             HeapFree(GetProcessHeap(), 0, entry);
77         }
78
79         LEAVE_GL();
80
81         context_release(context);
82     }
83
84     if (surface->flags & SFLAG_DIBSECTION)
85     {
86         /* Release the DC. */
87         SelectObject(surface->hDC, surface->dib.holdbitmap);
88         DeleteDC(surface->hDC);
89         /* Release the DIB section. */
90         DeleteObject(surface->dib.DIBsection);
91         surface->dib.bitmap_data = NULL;
92         surface->resource.allocatedMemory = NULL;
93     }
94
95     if (surface->flags & SFLAG_USERPTR)
96         wined3d_surface_set_mem(surface, NULL);
97     if (surface->overlay_dest)
98         list_remove(&surface->overlay_entry);
99
100     HeapFree(GetProcessHeap(), 0, surface->palette9);
101
102     resource_cleanup(&surface->resource);
103 }
104
105 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
106 {
107     TRACE("surface %p, container %p.\n", surface, container);
108
109     if (!container && type != WINED3D_CONTAINER_NONE)
110         ERR("Setting NULL container of type %#x.\n", type);
111
112     if (type == WINED3D_CONTAINER_SWAPCHAIN)
113     {
114         surface->get_drawable_size = get_drawable_size_swapchain;
115     }
116     else
117     {
118         switch (wined3d_settings.offscreen_rendering_mode)
119         {
120             case ORM_FBO:
121                 surface->get_drawable_size = get_drawable_size_fbo;
122                 break;
123
124             case ORM_BACKBUFFER:
125                 surface->get_drawable_size = get_drawable_size_backbuffer;
126                 break;
127
128             default:
129                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
130                 return;
131         }
132     }
133
134     surface->container.type = type;
135     surface->container.u.base = container;
136 }
137
138 struct blt_info
139 {
140     GLenum binding;
141     GLenum bind_target;
142     enum tex_types tex_type;
143     GLfloat coords[4][3];
144 };
145
146 struct float_rect
147 {
148     float l;
149     float t;
150     float r;
151     float b;
152 };
153
154 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
155 {
156     f->l = ((r->left * 2.0f) / w) - 1.0f;
157     f->t = ((r->top * 2.0f) / h) - 1.0f;
158     f->r = ((r->right * 2.0f) / w) - 1.0f;
159     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
160 }
161
162 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
163 {
164     GLfloat (*coords)[3] = info->coords;
165     struct float_rect f;
166
167     switch (target)
168     {
169         default:
170             FIXME("Unsupported texture target %#x\n", target);
171             /* Fall back to GL_TEXTURE_2D */
172         case GL_TEXTURE_2D:
173             info->binding = GL_TEXTURE_BINDING_2D;
174             info->bind_target = GL_TEXTURE_2D;
175             info->tex_type = tex_2d;
176             coords[0][0] = (float)rect->left / w;
177             coords[0][1] = (float)rect->top / h;
178             coords[0][2] = 0.0f;
179
180             coords[1][0] = (float)rect->right / w;
181             coords[1][1] = (float)rect->top / h;
182             coords[1][2] = 0.0f;
183
184             coords[2][0] = (float)rect->left / w;
185             coords[2][1] = (float)rect->bottom / h;
186             coords[2][2] = 0.0f;
187
188             coords[3][0] = (float)rect->right / w;
189             coords[3][1] = (float)rect->bottom / h;
190             coords[3][2] = 0.0f;
191             break;
192
193         case GL_TEXTURE_RECTANGLE_ARB:
194             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
195             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
196             info->tex_type = tex_rect;
197             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
198             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
199             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
200             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
201             break;
202
203         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
204             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
205             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
206             info->tex_type = tex_cube;
207             cube_coords_float(rect, w, h, &f);
208
209             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
210             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
211             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
212             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
213             break;
214
215         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
216             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
217             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
218             info->tex_type = tex_cube;
219             cube_coords_float(rect, w, h, &f);
220
221             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
222             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
223             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
224             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
225             break;
226
227         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
228             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
229             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
230             info->tex_type = tex_cube;
231             cube_coords_float(rect, w, h, &f);
232
233             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
234             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
235             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
236             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
237             break;
238
239         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
240             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
241             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
242             info->tex_type = tex_cube;
243             cube_coords_float(rect, w, h, &f);
244
245             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
246             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
247             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
248             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
249             break;
250
251         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
252             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
253             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
254             info->tex_type = tex_cube;
255             cube_coords_float(rect, w, h, &f);
256
257             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
258             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
259             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
260             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
261             break;
262
263         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
264             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
265             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
266             info->tex_type = tex_cube;
267             cube_coords_float(rect, w, h, &f);
268
269             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
270             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
271             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
272             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
273             break;
274     }
275 }
276
277 static inline void surface_get_rect(struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
278 {
279     if (rect_in)
280         *rect_out = *rect_in;
281     else
282     {
283         rect_out->left = 0;
284         rect_out->top = 0;
285         rect_out->right = surface->resource.width;
286         rect_out->bottom = surface->resource.height;
287     }
288 }
289
290 /* GL locking and context activation is done by the caller */
291 void draw_textured_quad(struct wined3d_surface *src_surface, const RECT *src_rect,
292         const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
293 {
294     struct blt_info info;
295
296     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
297
298     glEnable(info.bind_target);
299     checkGLcall("glEnable(bind_target)");
300
301     /* Bind the texture */
302     glBindTexture(info.bind_target, src_surface->texture_name);
303     checkGLcall("glBindTexture");
304
305     /* Filtering for StretchRect */
306     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
307             wined3d_gl_mag_filter(magLookup, Filter));
308     checkGLcall("glTexParameteri");
309     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
310             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
311     checkGLcall("glTexParameteri");
312     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
313     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
314     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
315     checkGLcall("glTexEnvi");
316
317     /* Draw a quad */
318     glBegin(GL_TRIANGLE_STRIP);
319     glTexCoord3fv(info.coords[0]);
320     glVertex2i(dst_rect->left, dst_rect->top);
321
322     glTexCoord3fv(info.coords[1]);
323     glVertex2i(dst_rect->right, dst_rect->top);
324
325     glTexCoord3fv(info.coords[2]);
326     glVertex2i(dst_rect->left, dst_rect->bottom);
327
328     glTexCoord3fv(info.coords[3]);
329     glVertex2i(dst_rect->right, dst_rect->bottom);
330     glEnd();
331
332     /* Unbind the texture */
333     glBindTexture(info.bind_target, 0);
334     checkGLcall("glBindTexture(info->bind_target, 0)");
335
336     /* We changed the filtering settings on the texture. Inform the
337      * container about this to get the filters reset properly next draw. */
338     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
339     {
340         struct wined3d_texture *texture = src_surface->container.u.texture;
341         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
342         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
343         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
344     }
345 }
346
347 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
348 {
349     const struct wined3d_format *format = surface->resource.format;
350     SYSTEM_INFO sysInfo;
351     BITMAPINFO *b_info;
352     int extraline = 0;
353     DWORD *masks;
354     UINT usage;
355     HDC dc;
356
357     TRACE("surface %p.\n", surface);
358
359     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
360     {
361         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
362         return WINED3DERR_INVALIDCALL;
363     }
364
365     switch (format->byte_count)
366     {
367         case 2:
368         case 4:
369             /* Allocate extra space to store the RGB bit masks. */
370             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
371             break;
372
373         case 3:
374             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
375             break;
376
377         default:
378             /* Allocate extra space for a palette. */
379             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
380                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
381             break;
382     }
383
384     if (!b_info)
385         return E_OUTOFMEMORY;
386
387     /* Some applications access the surface in via DWORDs, and do not take
388      * the necessary care at the end of the surface. So we need at least
389      * 4 extra bytes at the end of the surface. Check against the page size,
390      * if the last page used for the surface has at least 4 spare bytes we're
391      * safe, otherwise add an extra line to the DIB section. */
392     GetSystemInfo(&sysInfo);
393     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
394     {
395         extraline = 1;
396         TRACE("Adding an extra line to the DIB section.\n");
397     }
398
399     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
400     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
401     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
402     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
403     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
404             * wined3d_surface_get_pitch(surface);
405     b_info->bmiHeader.biPlanes = 1;
406     b_info->bmiHeader.biBitCount = format->byte_count * 8;
407
408     b_info->bmiHeader.biXPelsPerMeter = 0;
409     b_info->bmiHeader.biYPelsPerMeter = 0;
410     b_info->bmiHeader.biClrUsed = 0;
411     b_info->bmiHeader.biClrImportant = 0;
412
413     /* Get the bit masks */
414     masks = (DWORD *)b_info->bmiColors;
415     switch (surface->resource.format->id)
416     {
417         case WINED3DFMT_B8G8R8_UNORM:
418             usage = DIB_RGB_COLORS;
419             b_info->bmiHeader.biCompression = BI_RGB;
420             break;
421
422         case WINED3DFMT_B5G5R5X1_UNORM:
423         case WINED3DFMT_B5G5R5A1_UNORM:
424         case WINED3DFMT_B4G4R4A4_UNORM:
425         case WINED3DFMT_B4G4R4X4_UNORM:
426         case WINED3DFMT_B2G3R3_UNORM:
427         case WINED3DFMT_B2G3R3A8_UNORM:
428         case WINED3DFMT_R10G10B10A2_UNORM:
429         case WINED3DFMT_R8G8B8A8_UNORM:
430         case WINED3DFMT_R8G8B8X8_UNORM:
431         case WINED3DFMT_B10G10R10A2_UNORM:
432         case WINED3DFMT_B5G6R5_UNORM:
433         case WINED3DFMT_R16G16B16A16_UNORM:
434             usage = 0;
435             b_info->bmiHeader.biCompression = BI_BITFIELDS;
436             masks[0] = format->red_mask;
437             masks[1] = format->green_mask;
438             masks[2] = format->blue_mask;
439             break;
440
441         default:
442             /* Don't know palette */
443             b_info->bmiHeader.biCompression = BI_RGB;
444             usage = 0;
445             break;
446     }
447
448     if (!(dc = GetDC(0)))
449     {
450         HeapFree(GetProcessHeap(), 0, b_info);
451         return HRESULT_FROM_WIN32(GetLastError());
452     }
453
454     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
455             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
456             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
457     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
458     ReleaseDC(0, dc);
459
460     if (!surface->dib.DIBsection)
461     {
462         ERR("Failed to create DIB section.\n");
463         HeapFree(GetProcessHeap(), 0, b_info);
464         return HRESULT_FROM_WIN32(GetLastError());
465     }
466
467     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
468     /* Copy the existing surface to the dib section. */
469     if (surface->resource.allocatedMemory)
470     {
471         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
472                 surface->resource.height * wined3d_surface_get_pitch(surface));
473     }
474     else
475     {
476         /* This is to make maps read the GL texture although memory is allocated. */
477         surface->flags &= ~SFLAG_INSYSMEM;
478     }
479     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
480
481     HeapFree(GetProcessHeap(), 0, b_info);
482
483     /* Now allocate a DC. */
484     surface->hDC = CreateCompatibleDC(0);
485     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
486     TRACE("Using wined3d palette %p.\n", surface->palette);
487     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
488
489     surface->flags |= SFLAG_DIBSECTION;
490
491     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
492     surface->resource.heapMemory = NULL;
493
494     return WINED3D_OK;
495 }
496
497 static void surface_prepare_system_memory(struct wined3d_surface *surface)
498 {
499     struct wined3d_device *device = surface->resource.device;
500     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
501
502     TRACE("surface %p.\n", surface);
503
504     /* Performance optimization: Count how often a surface is locked, if it is
505      * locked regularly do not throw away the system memory copy. This avoids
506      * the need to download the surface from OpenGL all the time. The surface
507      * is still downloaded if the OpenGL texture is changed. */
508     if (!(surface->flags & SFLAG_DYNLOCK))
509     {
510         if (++surface->lockCount > MAXLOCKCOUNT)
511         {
512             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
513             surface->flags |= SFLAG_DYNLOCK;
514         }
515     }
516
517     /* Create a PBO for dynamically locked surfaces but don't do it for
518      * converted or NPOT surfaces. Also don't create a PBO for systemmem
519      * surfaces. */
520     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
521             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
522             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
523     {
524         struct wined3d_context *context;
525         GLenum error;
526
527         context = context_acquire(device, NULL);
528         ENTER_GL();
529
530         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
531         error = glGetError();
532         if (!surface->pbo || error != GL_NO_ERROR)
533             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
534
535         TRACE("Binding PBO %u.\n", surface->pbo);
536
537         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
538         checkGLcall("glBindBufferARB");
539
540         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
541                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
542         checkGLcall("glBufferDataARB");
543
544         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
545         checkGLcall("glBindBufferARB");
546
547         /* We don't need the system memory anymore and we can't even use it for PBOs. */
548         if (!(surface->flags & SFLAG_CLIENT))
549         {
550             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
551             surface->resource.heapMemory = NULL;
552         }
553         surface->resource.allocatedMemory = NULL;
554         surface->flags |= SFLAG_PBO;
555         LEAVE_GL();
556         context_release(context);
557     }
558     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
559     {
560         /* Whatever surface we have, make sure that there is memory allocated
561          * for the downloaded copy, or a PBO to map. */
562         if (!surface->resource.heapMemory)
563             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
564
565         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
566                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
567
568         if (surface->flags & SFLAG_INSYSMEM)
569             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
570     }
571 }
572
573 static void surface_evict_sysmem(struct wined3d_surface *surface)
574 {
575     if (surface->flags & SFLAG_DONOTFREE)
576         return;
577
578     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
579     surface->resource.allocatedMemory = NULL;
580     surface->resource.heapMemory = NULL;
581     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
582 }
583
584 /* Context activation is done by the caller. */
585 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
586         const struct wined3d_gl_info *gl_info, BOOL srgb)
587 {
588     struct wined3d_device *device = surface->resource.device;
589     DWORD active_sampler;
590     GLint active_texture;
591
592     /* We don't need a specific texture unit, but after binding the texture
593      * the current unit is dirty. Read the unit back instead of switching to
594      * 0, this avoids messing around with the state manager's GL states. The
595      * current texture unit should always be a valid one.
596      *
597      * To be more specific, this is tricky because we can implicitly be
598      * called from sampler() in state.c. This means we can't touch anything
599      * other than whatever happens to be the currently active texture, or we
600      * would risk marking already applied sampler states dirty again.
601      *
602      * TODO: Track the current active texture per GL context instead of using
603      * glGet(). */
604
605     ENTER_GL();
606     glGetIntegerv(GL_ACTIVE_TEXTURE, &active_texture);
607     LEAVE_GL();
608     active_sampler = device->rev_tex_unit_map[active_texture - GL_TEXTURE0_ARB];
609
610     if (active_sampler != WINED3D_UNMAPPED_STAGE)
611         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
612     surface_bind(surface, gl_info, srgb);
613 }
614
615 static void surface_force_reload(struct wined3d_surface *surface)
616 {
617     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
618 }
619
620 static void surface_release_client_storage(struct wined3d_surface *surface)
621 {
622     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
623
624     ENTER_GL();
625     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
626     if (surface->texture_name)
627     {
628         surface_bind_and_dirtify(surface, context->gl_info, FALSE);
629         glTexImage2D(surface->texture_target, surface->texture_level,
630                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
631     }
632     if (surface->texture_name_srgb)
633     {
634         surface_bind_and_dirtify(surface, context->gl_info, TRUE);
635         glTexImage2D(surface->texture_target, surface->texture_level,
636                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
637     }
638     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
639     LEAVE_GL();
640
641     context_release(context);
642
643     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
644     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
645     surface_force_reload(surface);
646 }
647
648 static HRESULT surface_private_setup(struct wined3d_surface *surface)
649 {
650     /* TODO: Check against the maximum texture sizes supported by the video card. */
651     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
652     unsigned int pow2Width, pow2Height;
653
654     TRACE("surface %p.\n", surface);
655
656     surface->texture_name = 0;
657     surface->texture_target = GL_TEXTURE_2D;
658
659     /* Non-power2 support */
660     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
661     {
662         pow2Width = surface->resource.width;
663         pow2Height = surface->resource.height;
664     }
665     else
666     {
667         /* Find the nearest pow2 match */
668         pow2Width = pow2Height = 1;
669         while (pow2Width < surface->resource.width)
670             pow2Width <<= 1;
671         while (pow2Height < surface->resource.height)
672             pow2Height <<= 1;
673     }
674     surface->pow2Width = pow2Width;
675     surface->pow2Height = pow2Height;
676
677     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
678     {
679         /* TODO: Add support for non power two compressed textures. */
680         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
681         {
682             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
683                   surface, surface->resource.width, surface->resource.height);
684             return WINED3DERR_NOTAVAILABLE;
685         }
686     }
687
688     if (pow2Width != surface->resource.width
689             || pow2Height != surface->resource.height)
690     {
691         surface->flags |= SFLAG_NONPOW2;
692     }
693
694     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
695             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
696     {
697         /* One of three options:
698          * 1: Do the same as we do with NPOT and scale the texture, (any
699          *    texture ops would require the texture to be scaled which is
700          *    potentially slow)
701          * 2: Set the texture to the maximum size (bad idea).
702          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
703          * 4: Create the surface, but allow it to be used only for DirectDraw
704          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
705          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
706          *    the render target. */
707         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
708         {
709             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
710             return WINED3DERR_NOTAVAILABLE;
711         }
712
713         /* We should never use this surface in combination with OpenGL! */
714         TRACE("Creating an oversized surface: %ux%u.\n",
715                 surface->pow2Width, surface->pow2Height);
716     }
717     else
718     {
719         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
720          * and EXT_PALETTED_TEXTURE is used in combination with texture
721          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
722          * EXT_PALETTED_TEXTURE doesn't work in combination with
723          * ARB_TEXTURE_RECTANGLE. */
724         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
725                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
726                 && gl_info->supported[EXT_PALETTED_TEXTURE]
727                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
728         {
729             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
730             surface->pow2Width = surface->resource.width;
731             surface->pow2Height = surface->resource.height;
732             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
733         }
734     }
735
736     switch (wined3d_settings.offscreen_rendering_mode)
737     {
738         case ORM_FBO:
739             surface->get_drawable_size = get_drawable_size_fbo;
740             break;
741
742         case ORM_BACKBUFFER:
743             surface->get_drawable_size = get_drawable_size_backbuffer;
744             break;
745
746         default:
747             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
748             return WINED3DERR_INVALIDCALL;
749     }
750
751     surface->flags |= SFLAG_INSYSMEM;
752
753     return WINED3D_OK;
754 }
755
756 static void surface_realize_palette(struct wined3d_surface *surface)
757 {
758     struct wined3d_palette *palette = surface->palette;
759
760     TRACE("surface %p.\n", surface);
761
762     if (!palette) return;
763
764     if (surface->resource.format->id == WINED3DFMT_P8_UINT
765             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
766     {
767         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
768         {
769             /* Make sure the texture is up to date. This call doesn't do
770              * anything if the texture is already up to date. */
771             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
772
773             /* We want to force a palette refresh, so mark the drawable as not being up to date */
774             if (!surface_is_offscreen(surface))
775                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
776         }
777         else
778         {
779             if (!(surface->flags & SFLAG_INSYSMEM))
780             {
781                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
782                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
783             }
784             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
785         }
786     }
787
788     if (surface->flags & SFLAG_DIBSECTION)
789     {
790         RGBQUAD col[256];
791         unsigned int i;
792
793         TRACE("Updating the DC's palette.\n");
794
795         for (i = 0; i < 256; ++i)
796         {
797             col[i].rgbRed   = palette->palents[i].peRed;
798             col[i].rgbGreen = palette->palents[i].peGreen;
799             col[i].rgbBlue  = palette->palents[i].peBlue;
800             col[i].rgbReserved = 0;
801         }
802         SetDIBColorTable(surface->hDC, 0, 256, col);
803     }
804
805     /* Propagate the changes to the drawable when we have a palette. */
806     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
807         surface_load_location(surface, SFLAG_INDRAWABLE, NULL);
808 }
809
810 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
811 {
812     HRESULT hr;
813
814     /* If there's no destination surface there is nothing to do. */
815     if (!surface->overlay_dest)
816         return WINED3D_OK;
817
818     /* Blt calls ModifyLocation on the dest surface, which in turn calls
819      * DrawOverlay to update the overlay. Prevent an endless recursion. */
820     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
821         return WINED3D_OK;
822
823     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
824     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
825             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
826     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
827
828     return hr;
829 }
830
831 static void surface_preload(struct wined3d_surface *surface)
832 {
833     TRACE("surface %p.\n", surface);
834
835     surface_internal_preload(surface, SRGB_ANY);
836 }
837
838 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
839 {
840     struct wined3d_device *device = surface->resource.device;
841     const RECT *pass_rect = rect;
842
843     TRACE("surface %p, rect %s, flags %#x.\n",
844             surface, wine_dbgstr_rect(rect), flags);
845
846     if (flags & WINED3DLOCK_DISCARD)
847     {
848         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
849         surface_prepare_system_memory(surface);
850         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
851     }
852     else
853     {
854         /* surface_load_location() does not check if the rectangle specifies
855          * the full surface. Most callers don't need that, so do it here. */
856         if (rect && !rect->top && !rect->left
857                 && rect->right == surface->resource.width
858                 && rect->bottom == surface->resource.height)
859             pass_rect = NULL;
860
861         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
862                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
863                 || surface == device->fb.render_targets[0])))
864             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
865     }
866
867     if (surface->flags & SFLAG_PBO)
868     {
869         const struct wined3d_gl_info *gl_info;
870         struct wined3d_context *context;
871
872         context = context_acquire(device, NULL);
873         gl_info = context->gl_info;
874
875         ENTER_GL();
876         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
877         checkGLcall("glBindBufferARB");
878
879         /* This shouldn't happen but could occur if some other function
880          * didn't handle the PBO properly. */
881         if (surface->resource.allocatedMemory)
882             ERR("The surface already has PBO memory allocated.\n");
883
884         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
885         checkGLcall("glMapBufferARB");
886
887         /* Make sure the PBO isn't set anymore in order not to break non-PBO
888          * calls. */
889         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
890         checkGLcall("glBindBufferARB");
891
892         LEAVE_GL();
893         context_release(context);
894     }
895
896     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
897     {
898         if (!rect)
899             surface_add_dirty_rect(surface, NULL);
900         else
901         {
902             WINED3DBOX b;
903
904             b.Left = rect->left;
905             b.Top = rect->top;
906             b.Right = rect->right;
907             b.Bottom = rect->bottom;
908             b.Front = 0;
909             b.Back = 1;
910             surface_add_dirty_rect(surface, &b);
911         }
912     }
913 }
914
915 static void surface_unmap(struct wined3d_surface *surface)
916 {
917     struct wined3d_device *device = surface->resource.device;
918     BOOL fullsurface;
919
920     TRACE("surface %p.\n", surface);
921
922     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
923
924     if (surface->flags & SFLAG_PBO)
925     {
926         const struct wined3d_gl_info *gl_info;
927         struct wined3d_context *context;
928
929         TRACE("Freeing PBO memory.\n");
930
931         context = context_acquire(device, NULL);
932         gl_info = context->gl_info;
933
934         ENTER_GL();
935         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
936         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
937         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
938         checkGLcall("glUnmapBufferARB");
939         LEAVE_GL();
940         context_release(context);
941
942         surface->resource.allocatedMemory = NULL;
943     }
944
945     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
946
947     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
948     {
949         TRACE("Not dirtified, nothing to do.\n");
950         goto done;
951     }
952
953     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
954             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
955     {
956         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
957         {
958             static BOOL warned = FALSE;
959             if (!warned)
960             {
961                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
962                 warned = TRUE;
963             }
964             goto done;
965         }
966
967         if (!surface->dirtyRect.left && !surface->dirtyRect.top
968                 && surface->dirtyRect.right == surface->resource.width
969                 && surface->dirtyRect.bottom == surface->resource.height)
970         {
971             fullsurface = TRUE;
972         }
973         else
974         {
975             /* TODO: Proper partial rectangle tracking. */
976             fullsurface = FALSE;
977             surface->flags |= SFLAG_INSYSMEM;
978         }
979
980         surface_load_location(surface, SFLAG_INDRAWABLE, fullsurface ? NULL : &surface->dirtyRect);
981
982         /* Partial rectangle tracking is not commonly implemented, it is only
983          * done for render targets. INSYSMEM was set before to tell
984          * surface_load_location() where to read the rectangle from.
985          * Indrawable is set because all modifications from the partial
986          * sysmem copy are written back to the drawable, thus the surface is
987          * merged again in the drawable. The sysmem copy is not fully up to
988          * date because only a subrectangle was read in Map(). */
989         if (!fullsurface)
990         {
991             surface_modify_location(surface, SFLAG_INDRAWABLE, TRUE);
992             surface_evict_sysmem(surface);
993         }
994
995         surface->dirtyRect.left = surface->resource.width;
996         surface->dirtyRect.top = surface->resource.height;
997         surface->dirtyRect.right = 0;
998         surface->dirtyRect.bottom = 0;
999     }
1000     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1001     {
1002         FIXME("Depth / stencil buffer locking is not implemented.\n");
1003     }
1004
1005 done:
1006     /* Overlays have to be redrawn manually after changes with the GL implementation */
1007     if (surface->overlay_dest)
1008         surface->surface_ops->surface_draw_overlay(surface);
1009 }
1010
1011 static HRESULT surface_getdc(struct wined3d_surface *surface)
1012 {
1013     WINED3DLOCKED_RECT lock;
1014     HRESULT hr;
1015
1016     TRACE("surface %p.\n", surface);
1017
1018     /* Create a DIB section if there isn't a dc yet. */
1019     if (!surface->hDC)
1020     {
1021         if (surface->flags & SFLAG_CLIENT)
1022         {
1023             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1024             surface_release_client_storage(surface);
1025         }
1026         hr = surface_create_dib_section(surface);
1027         if (FAILED(hr))
1028             return WINED3DERR_INVALIDCALL;
1029
1030         /* Use the DIB section from now on if we are not using a PBO. */
1031         if (!(surface->flags & SFLAG_PBO))
1032             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1033     }
1034
1035     /* Map the surface. */
1036     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1037     if (FAILED(hr))
1038         ERR("Map failed, hr %#x.\n", hr);
1039
1040     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1041      * activates the allocatedMemory. */
1042     if (surface->flags & SFLAG_PBO)
1043         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->dib.bitmap_size);
1044
1045     return hr;
1046 }
1047
1048 static HRESULT surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1049 {
1050     TRACE("surface %p, override %p.\n", surface, override);
1051
1052     /* Flipping is only supported on render targets and overlays. */
1053     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
1054     {
1055         WARN("Tried to flip a non-render target, non-overlay surface.\n");
1056         return WINEDDERR_NOTFLIPPABLE;
1057     }
1058
1059     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1060     {
1061         flip_surface(surface, override);
1062
1063         /* Update the overlay if it is visible */
1064         if (surface->overlay_dest)
1065             return surface->surface_ops->surface_draw_overlay(surface);
1066         else
1067             return WINED3D_OK;
1068     }
1069
1070     return WINED3D_OK;
1071 }
1072
1073 static BOOL surface_is_full_rect(struct wined3d_surface *surface, const RECT *r)
1074 {
1075     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1076         return FALSE;
1077     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1078         return FALSE;
1079     return TRUE;
1080 }
1081
1082 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1083         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1084 {
1085     const struct wined3d_gl_info *gl_info;
1086     struct wined3d_context *context;
1087     DWORD src_mask, dst_mask;
1088     GLbitfield gl_mask;
1089
1090     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1091             device, src_surface, wine_dbgstr_rect(src_rect),
1092             dst_surface, wine_dbgstr_rect(dst_rect));
1093
1094     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1095     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1096
1097     if (src_mask != dst_mask)
1098     {
1099         ERR("Incompatible formats %s and %s.\n",
1100                 debug_d3dformat(src_surface->resource.format->id),
1101                 debug_d3dformat(dst_surface->resource.format->id));
1102         return;
1103     }
1104
1105     if (!src_mask)
1106     {
1107         ERR("Not a depth / stencil format: %s.\n",
1108                 debug_d3dformat(src_surface->resource.format->id));
1109         return;
1110     }
1111
1112     gl_mask = 0;
1113     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1114         gl_mask |= GL_DEPTH_BUFFER_BIT;
1115     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1116         gl_mask |= GL_STENCIL_BUFFER_BIT;
1117
1118     /* Make sure the locations are up-to-date. Loading the destination
1119      * surface isn't required if the entire surface is overwritten. */
1120     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1121     if (!surface_is_full_rect(dst_surface, dst_rect))
1122         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1123
1124     context = context_acquire(device, NULL);
1125     if (!context->valid)
1126     {
1127         context_release(context);
1128         WARN("Invalid context, skipping blit.\n");
1129         return;
1130     }
1131
1132     gl_info = context->gl_info;
1133
1134     ENTER_GL();
1135
1136     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1137     glReadBuffer(GL_NONE);
1138     checkGLcall("glReadBuffer()");
1139     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1140
1141     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1142     context_set_draw_buffer(context, GL_NONE);
1143     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1144
1145     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1146     {
1147         glDepthMask(GL_TRUE);
1148         device_invalidate_state(device, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1149     }
1150     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1151     {
1152         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1153         {
1154             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1155             device_invalidate_state(device, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1156         }
1157         glStencilMask(~0U);
1158         device_invalidate_state(device, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1159     }
1160
1161     glDisable(GL_SCISSOR_TEST);
1162     device_invalidate_state(device, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1163
1164     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1165             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1166     checkGLcall("glBlitFramebuffer()");
1167
1168     LEAVE_GL();
1169
1170     if (wined3d_settings.strict_draw_ordering)
1171         wglFlush(); /* Flush to ensure ordering across contexts. */
1172
1173     context_release(context);
1174 }
1175
1176 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1177         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1178         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1179 {
1180     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1181         return FALSE;
1182
1183     /* Source and/or destination need to be on the GL side */
1184     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1185         return FALSE;
1186
1187     switch (blit_op)
1188     {
1189         case WINED3D_BLIT_OP_COLOR_BLIT:
1190             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1191                 return FALSE;
1192             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1193                 return FALSE;
1194             break;
1195
1196         case WINED3D_BLIT_OP_DEPTH_BLIT:
1197             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1198                 return FALSE;
1199             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1200                 return FALSE;
1201             break;
1202
1203         default:
1204             return FALSE;
1205     }
1206
1207     if (!(src_format->id == dst_format->id
1208             || (is_identity_fixup(src_format->color_fixup)
1209             && is_identity_fixup(dst_format->color_fixup))))
1210         return FALSE;
1211
1212     return TRUE;
1213 }
1214
1215 static BOOL surface_convert_depth_to_float(struct wined3d_surface *surface, DWORD depth, float *float_depth)
1216 {
1217     const struct wined3d_format *format = surface->resource.format;
1218
1219     switch (format->id)
1220     {
1221         case WINED3DFMT_S1_UINT_D15_UNORM:
1222             *float_depth = depth / (float)0x00007fff;
1223             break;
1224
1225         case WINED3DFMT_D16_UNORM:
1226             *float_depth = depth / (float)0x0000ffff;
1227             break;
1228
1229         case WINED3DFMT_D24_UNORM_S8_UINT:
1230         case WINED3DFMT_X8D24_UNORM:
1231             *float_depth = depth / (float)0x00ffffff;
1232             break;
1233
1234         case WINED3DFMT_D32_UNORM:
1235             *float_depth = depth / (float)0xffffffff;
1236             break;
1237
1238         default:
1239             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1240             return FALSE;
1241     }
1242
1243     return TRUE;
1244 }
1245
1246 /* Do not call while under the GL lock. */
1247 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1248 {
1249     const struct wined3d_resource *resource = &surface->resource;
1250     struct wined3d_device *device = resource->device;
1251     const struct blit_shader *blitter;
1252
1253     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1254             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1255     if (!blitter)
1256     {
1257         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1258         return WINED3DERR_INVALIDCALL;
1259     }
1260
1261     return blitter->depth_fill(device, surface, rect, depth);
1262 }
1263
1264 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1265         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1266 {
1267     struct wined3d_device *device = src_surface->resource.device;
1268
1269     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1270             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1271             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1272         return WINED3DERR_INVALIDCALL;
1273
1274     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1275
1276     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1277             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1278     surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
1279
1280     return WINED3D_OK;
1281 }
1282
1283 /* Do not call while under the GL lock. */
1284 static HRESULT surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1285         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1286         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1287 {
1288     struct wined3d_device *device = dst_surface->resource.device;
1289     DWORD src_ds_flags, dst_ds_flags;
1290
1291     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1292             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1293             flags, fx, debug_d3dtexturefiltertype(filter));
1294     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1295
1296     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1297     {
1298         WARN(" Surface is busy, returning DDERR_SURFACEBUSY\n");
1299         return WINEDDERR_SURFACEBUSY;
1300     }
1301
1302     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1303     if (src_surface)
1304         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1305     else
1306         src_ds_flags = 0;
1307
1308     if (src_ds_flags || dst_ds_flags)
1309     {
1310         if (flags & WINEDDBLT_DEPTHFILL)
1311         {
1312             float depth;
1313             RECT rect;
1314
1315             TRACE("Depth fill.\n");
1316
1317             surface_get_rect(dst_surface, dst_rect_in, &rect);
1318
1319             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1320                 return WINED3DERR_INVALIDCALL;
1321
1322             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &rect, depth)))
1323                 return WINED3D_OK;
1324         }
1325         else
1326         {
1327             RECT src_rect, dst_rect;
1328
1329             /* Accessing depth / stencil surfaces is supposed to fail while in
1330              * a scene, except for fills, which seem to work. */
1331             if (device->inScene)
1332             {
1333                 WARN("Rejecting depth / stencil access while in scene.\n");
1334                 return WINED3DERR_INVALIDCALL;
1335             }
1336
1337             if (src_ds_flags != dst_ds_flags)
1338             {
1339                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1340                 return WINED3DERR_INVALIDCALL;
1341             }
1342
1343             if (src_rect_in && (src_rect_in->top || src_rect_in->left
1344                     || src_rect_in->bottom != src_surface->resource.height
1345                     || src_rect_in->right != src_surface->resource.width))
1346             {
1347                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1348                         wine_dbgstr_rect(src_rect_in));
1349                 return WINED3DERR_INVALIDCALL;
1350             }
1351
1352             if (dst_rect_in && (dst_rect_in->top || dst_rect_in->left
1353                     || dst_rect_in->bottom != dst_surface->resource.height
1354                     || dst_rect_in->right != dst_surface->resource.width))
1355             {
1356                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1357                         wine_dbgstr_rect(src_rect_in));
1358                 return WINED3DERR_INVALIDCALL;
1359             }
1360
1361             if (src_surface->resource.height != dst_surface->resource.height
1362                     || src_surface->resource.width != dst_surface->resource.width)
1363             {
1364                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1365                 return WINED3DERR_INVALIDCALL;
1366             }
1367
1368             surface_get_rect(src_surface, src_rect_in, &src_rect);
1369             surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1370
1371             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1372                 return WINED3D_OK;
1373         }
1374     }
1375
1376     /* Special cases for render targets. */
1377     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1378             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1379     {
1380         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, dst_rect_in,
1381                 src_surface, src_rect_in, flags, fx, filter)))
1382             return WINED3D_OK;
1383     }
1384
1385     /* For the rest call the X11 surface implementation. For render targets
1386      * this should be implemented OpenGL accelerated in BltOverride, other
1387      * blits are rather rare. */
1388     return surface_cpu_blt(dst_surface, dst_rect_in, src_surface, src_rect_in, flags, fx, filter);
1389 }
1390
1391 /* Do not call while under the GL lock. */
1392 static HRESULT surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1393         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1394 {
1395     struct wined3d_device *device = dst_surface->resource.device;
1396
1397     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
1398             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1399
1400     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface->flags & SFLAG_LOCKED))
1401     {
1402         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1403         return WINEDDERR_SURFACEBUSY;
1404     }
1405
1406     if (device->inScene && (dst_surface == device->fb.depth_stencil || src_surface == device->fb.depth_stencil))
1407     {
1408         WARN("Attempt to access the depth / stencil surface while in a scene.\n");
1409         return WINED3DERR_INVALIDCALL;
1410     }
1411
1412     /* Special cases for RenderTargets */
1413     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1414             || (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
1415     {
1416
1417         RECT src_rect, dst_rect;
1418         DWORD flags = 0;
1419
1420         surface_get_rect(src_surface, src_rect_in, &src_rect);
1421
1422         dst_rect.left = dst_x;
1423         dst_rect.top = dst_y;
1424         dst_rect.right = dst_x + src_rect.right - src_rect.left;
1425         dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1426
1427         /* Convert BltFast flags into Blt ones because BltOverride is called
1428          * from Blt as well. */
1429         if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1430             flags |= WINEDDBLT_KEYSRC;
1431         if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1432             flags |= WINEDDBLT_KEYDEST;
1433         if (trans & WINEDDBLTFAST_WAIT)
1434             flags |= WINEDDBLT_WAIT;
1435         if (trans & WINEDDBLTFAST_DONOTWAIT)
1436             flags |= WINEDDBLT_DONOTWAIT;
1437
1438         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface,
1439                 &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT)))
1440             return WINED3D_OK;
1441     }
1442
1443     return surface_cpu_bltfast(dst_surface, dst_x, dst_y, src_surface, src_rect_in, trans);
1444 }
1445
1446 static HRESULT surface_set_mem(struct wined3d_surface *surface, void *mem)
1447 {
1448     TRACE("surface %p, mem %p.\n", surface, mem);
1449
1450     if (mem && mem != surface->resource.allocatedMemory)
1451     {
1452         void *release = NULL;
1453
1454         /* Do I have to copy the old surface content? */
1455         if (surface->flags & SFLAG_DIBSECTION)
1456         {
1457             SelectObject(surface->hDC, surface->dib.holdbitmap);
1458             DeleteDC(surface->hDC);
1459             /* Release the DIB section. */
1460             DeleteObject(surface->dib.DIBsection);
1461             surface->dib.bitmap_data = NULL;
1462             surface->resource.allocatedMemory = NULL;
1463             surface->hDC = NULL;
1464             surface->flags &= ~SFLAG_DIBSECTION;
1465         }
1466         else if (!(surface->flags & SFLAG_USERPTR))
1467         {
1468             release = surface->resource.heapMemory;
1469             surface->resource.heapMemory = NULL;
1470         }
1471         surface->resource.allocatedMemory = mem;
1472         surface->flags |= SFLAG_USERPTR;
1473
1474         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
1475         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1476
1477         /* For client textures OpenGL has to be notified. */
1478         if (surface->flags & SFLAG_CLIENT)
1479             surface_release_client_storage(surface);
1480
1481         /* Now free the old memory if any. */
1482         HeapFree(GetProcessHeap(), 0, release);
1483     }
1484     else if (surface->flags & SFLAG_USERPTR)
1485     {
1486         /* Map and GetDC will re-create the dib section and allocated memory. */
1487         surface->resource.allocatedMemory = NULL;
1488         /* HeapMemory should be NULL already. */
1489         if (surface->resource.heapMemory)
1490             ERR("User pointer surface has heap memory allocated.\n");
1491         surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
1492
1493         if (surface->flags & SFLAG_CLIENT)
1494             surface_release_client_storage(surface);
1495
1496         surface_prepare_system_memory(surface);
1497         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1498     }
1499
1500     return WINED3D_OK;
1501 }
1502
1503 /* Context activation is done by the caller. */
1504 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1505 {
1506     if (!surface->resource.heapMemory)
1507     {
1508         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1509         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1510                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1511     }
1512
1513     ENTER_GL();
1514     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1515     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1516     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1517             surface->resource.size, surface->resource.allocatedMemory));
1518     checkGLcall("glGetBufferSubDataARB");
1519     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1520     checkGLcall("glDeleteBuffersARB");
1521     LEAVE_GL();
1522
1523     surface->pbo = 0;
1524     surface->flags &= ~SFLAG_PBO;
1525 }
1526
1527 /* Do not call while under the GL lock. */
1528 static void surface_unload(struct wined3d_resource *resource)
1529 {
1530     struct wined3d_surface *surface = surface_from_resource(resource);
1531     struct wined3d_renderbuffer_entry *entry, *entry2;
1532     struct wined3d_device *device = resource->device;
1533     const struct wined3d_gl_info *gl_info;
1534     struct wined3d_context *context;
1535
1536     TRACE("surface %p.\n", surface);
1537
1538     if (resource->pool == WINED3DPOOL_DEFAULT)
1539     {
1540         /* Default pool resources are supposed to be destroyed before Reset is called.
1541          * Implicit resources stay however. So this means we have an implicit render target
1542          * or depth stencil. The content may be destroyed, but we still have to tear down
1543          * opengl resources, so we cannot leave early.
1544          *
1545          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1546          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1547          * or the depth stencil into an FBO the texture or render buffer will be removed
1548          * and all flags get lost
1549          */
1550         surface_init_sysmem(surface);
1551     }
1552     else
1553     {
1554         /* Load the surface into system memory */
1555         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1556         surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
1557     }
1558     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1559     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1560     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1561
1562     context = context_acquire(device, NULL);
1563     gl_info = context->gl_info;
1564
1565     /* Destroy PBOs, but load them into real sysmem before */
1566     if (surface->flags & SFLAG_PBO)
1567         surface_remove_pbo(surface, gl_info);
1568
1569     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1570      * all application-created targets the application has to release the surface
1571      * before calling _Reset
1572      */
1573     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1574     {
1575         ENTER_GL();
1576         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1577         LEAVE_GL();
1578         list_remove(&entry->entry);
1579         HeapFree(GetProcessHeap(), 0, entry);
1580     }
1581     list_init(&surface->renderbuffers);
1582     surface->current_renderbuffer = NULL;
1583
1584     /* If we're in a texture, the texture name belongs to the texture.
1585      * Otherwise, destroy it. */
1586     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1587     {
1588         ENTER_GL();
1589         glDeleteTextures(1, &surface->texture_name);
1590         surface->texture_name = 0;
1591         glDeleteTextures(1, &surface->texture_name_srgb);
1592         surface->texture_name_srgb = 0;
1593         LEAVE_GL();
1594     }
1595
1596     context_release(context);
1597
1598     resource_unload(resource);
1599 }
1600
1601 static const struct wined3d_resource_ops surface_resource_ops =
1602 {
1603     surface_unload,
1604 };
1605
1606 static const struct wined3d_surface_ops surface_ops =
1607 {
1608     surface_private_setup,
1609     surface_cleanup,
1610     surface_realize_palette,
1611     surface_draw_overlay,
1612     surface_preload,
1613     surface_map,
1614     surface_unmap,
1615     surface_getdc,
1616     surface_flip,
1617     surface_blt,
1618     surface_bltfast,
1619     surface_set_mem,
1620 };
1621
1622 /*****************************************************************************
1623  * Initializes the GDI surface, aka creates the DIB section we render to
1624  * The DIB section creation is done by calling GetDC, which will create the
1625  * section and releasing the dc to allow the app to use it. The dib section
1626  * will stay until the surface is released
1627  *
1628  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1629  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1630  * avoid confusion in the shared surface code.
1631  *
1632  * Returns:
1633  *  WINED3D_OK on success
1634  *  The return values of called methods on failure
1635  *
1636  *****************************************************************************/
1637 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1638 {
1639     HRESULT hr;
1640
1641     TRACE("surface %p.\n", surface);
1642
1643     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1644     {
1645         ERR("Overlays not yet supported by GDI surfaces.\n");
1646         return WINED3DERR_INVALIDCALL;
1647     }
1648
1649     /* Sysmem textures have memory already allocated - release it,
1650      * this avoids an unnecessary memcpy. */
1651     hr = surface_create_dib_section(surface);
1652     if (SUCCEEDED(hr))
1653     {
1654         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1655         surface->resource.heapMemory = NULL;
1656         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1657     }
1658
1659     /* We don't mind the nonpow2 stuff in GDI. */
1660     surface->pow2Width = surface->resource.width;
1661     surface->pow2Height = surface->resource.height;
1662
1663     return WINED3D_OK;
1664 }
1665
1666 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1667 {
1668     TRACE("surface %p.\n", surface);
1669
1670     if (surface->flags & SFLAG_DIBSECTION)
1671     {
1672         /* Release the DC. */
1673         SelectObject(surface->hDC, surface->dib.holdbitmap);
1674         DeleteDC(surface->hDC);
1675         /* Release the DIB section. */
1676         DeleteObject(surface->dib.DIBsection);
1677         surface->dib.bitmap_data = NULL;
1678         surface->resource.allocatedMemory = NULL;
1679     }
1680
1681     if (surface->flags & SFLAG_USERPTR)
1682         wined3d_surface_set_mem(surface, NULL);
1683     if (surface->overlay_dest)
1684         list_remove(&surface->overlay_entry);
1685
1686     HeapFree(GetProcessHeap(), 0, surface->palette9);
1687
1688     resource_cleanup(&surface->resource);
1689 }
1690
1691 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1692 {
1693     struct wined3d_palette *palette = surface->palette;
1694
1695     TRACE("surface %p.\n", surface);
1696
1697     if (!palette) return;
1698
1699     if (surface->flags & SFLAG_DIBSECTION)
1700     {
1701         RGBQUAD col[256];
1702         unsigned int i;
1703
1704         TRACE("Updating the DC's palette.\n");
1705
1706         for (i = 0; i < 256; ++i)
1707         {
1708             col[i].rgbRed = palette->palents[i].peRed;
1709             col[i].rgbGreen = palette->palents[i].peGreen;
1710             col[i].rgbBlue = palette->palents[i].peBlue;
1711             col[i].rgbReserved = 0;
1712         }
1713         SetDIBColorTable(surface->hDC, 0, 256, col);
1714     }
1715
1716     /* Update the image because of the palette change. Some games like e.g.
1717      * Red Alert call SetEntries a lot to implement fading. */
1718     /* Tell the swapchain to update the screen. */
1719     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1720     {
1721         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1722         if (surface == swapchain->front_buffer)
1723         {
1724             x11_copy_to_screen(swapchain, NULL);
1725         }
1726     }
1727 }
1728
1729 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
1730 {
1731     FIXME("GDI surfaces can't draw overlays yet.\n");
1732     return E_FAIL;
1733 }
1734
1735 static void gdi_surface_preload(struct wined3d_surface *surface)
1736 {
1737     TRACE("surface %p.\n", surface);
1738
1739     ERR("Preloading GDI surfaces is not supported.\n");
1740 }
1741
1742 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1743 {
1744     TRACE("surface %p, rect %s, flags %#x.\n",
1745             surface, wine_dbgstr_rect(rect), flags);
1746
1747     if (!surface->resource.allocatedMemory)
1748     {
1749         /* This happens on gdi surfaces if the application set a user pointer
1750          * and resets it. Recreate the DIB section. */
1751         surface_create_dib_section(surface);
1752         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1753     }
1754 }
1755
1756 static void gdi_surface_unmap(struct wined3d_surface *surface)
1757 {
1758     TRACE("surface %p.\n", surface);
1759
1760     /* Tell the swapchain to update the screen. */
1761     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1762     {
1763         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1764         if (surface == swapchain->front_buffer)
1765         {
1766             x11_copy_to_screen(swapchain, &surface->lockedRect);
1767         }
1768     }
1769
1770     memset(&surface->lockedRect, 0, sizeof(RECT));
1771 }
1772
1773 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
1774 {
1775     WINED3DLOCKED_RECT lock;
1776     HRESULT hr;
1777
1778     TRACE("surface %p.\n", surface);
1779
1780     /* Should have a DIB section already. */
1781     if (!(surface->flags & SFLAG_DIBSECTION))
1782     {
1783         WARN("DC not supported on this surface\n");
1784         return WINED3DERR_INVALIDCALL;
1785     }
1786
1787     /* Map the surface. */
1788     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1789     if (FAILED(hr))
1790         ERR("Map failed, hr %#x.\n", hr);
1791
1792     return hr;
1793 }
1794
1795 static HRESULT gdi_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1796 {
1797     TRACE("surface %p, override %p.\n", surface, override);
1798
1799     return WINED3D_OK;
1800 }
1801
1802 static HRESULT gdi_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
1803         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
1804         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1805 {
1806     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1807             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
1808             flags, fx, debug_d3dtexturefiltertype(filter));
1809
1810     return surface_cpu_blt(dst_surface, dst_rect, src_surface, src_rect, flags, fx, filter);
1811 }
1812
1813 static HRESULT gdi_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1814         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
1815 {
1816     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
1817             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
1818
1819     return surface_cpu_bltfast(dst_surface, dst_x, dst_y, src_surface, src_rect, trans);
1820 }
1821
1822 static HRESULT gdi_surface_set_mem(struct wined3d_surface *surface, void *mem)
1823 {
1824     TRACE("surface %p, mem %p.\n", surface, mem);
1825
1826     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
1827     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1828     {
1829         ERR("Not supported on render targets.\n");
1830         return WINED3DERR_INVALIDCALL;
1831     }
1832
1833     if (mem && mem != surface->resource.allocatedMemory)
1834     {
1835         void *release = NULL;
1836
1837         /* Do I have to copy the old surface content? */
1838         if (surface->flags & SFLAG_DIBSECTION)
1839         {
1840             SelectObject(surface->hDC, surface->dib.holdbitmap);
1841             DeleteDC(surface->hDC);
1842             /* Release the DIB section. */
1843             DeleteObject(surface->dib.DIBsection);
1844             surface->dib.bitmap_data = NULL;
1845             surface->resource.allocatedMemory = NULL;
1846             surface->hDC = NULL;
1847             surface->flags &= ~SFLAG_DIBSECTION;
1848         }
1849         else if (!(surface->flags & SFLAG_USERPTR))
1850         {
1851             release = surface->resource.allocatedMemory;
1852         }
1853         surface->resource.allocatedMemory = mem;
1854         surface->flags |= SFLAG_USERPTR | SFLAG_INSYSMEM;
1855
1856         /* Now free the old memory, if any. */
1857         HeapFree(GetProcessHeap(), 0, release);
1858     }
1859     else if (surface->flags & SFLAG_USERPTR)
1860     {
1861         /* Map() and GetDC() will re-create the dib section and allocated memory. */
1862         surface->resource.allocatedMemory = NULL;
1863         surface->flags &= ~SFLAG_USERPTR;
1864     }
1865
1866     return WINED3D_OK;
1867 }
1868
1869 static const struct wined3d_surface_ops gdi_surface_ops =
1870 {
1871     gdi_surface_private_setup,
1872     surface_gdi_cleanup,
1873     gdi_surface_realize_palette,
1874     gdi_surface_draw_overlay,
1875     gdi_surface_preload,
1876     gdi_surface_map,
1877     gdi_surface_unmap,
1878     gdi_surface_getdc,
1879     gdi_surface_flip,
1880     gdi_surface_blt,
1881     gdi_surface_bltfast,
1882     gdi_surface_set_mem,
1883 };
1884
1885 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
1886 {
1887     GLuint *name;
1888     DWORD flag;
1889
1890     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
1891
1892     if(srgb)
1893     {
1894         name = &surface->texture_name_srgb;
1895         flag = SFLAG_INSRGBTEX;
1896     }
1897     else
1898     {
1899         name = &surface->texture_name;
1900         flag = SFLAG_INTEXTURE;
1901     }
1902
1903     if (!*name && new_name)
1904     {
1905         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
1906          * surface has no texture name yet. See if we can get rid of this. */
1907         if (surface->flags & flag)
1908             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
1909         surface_modify_location(surface, flag, FALSE);
1910     }
1911
1912     *name = new_name;
1913     surface_force_reload(surface);
1914 }
1915
1916 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
1917 {
1918     TRACE("surface %p, target %#x.\n", surface, target);
1919
1920     if (surface->texture_target != target)
1921     {
1922         if (target == GL_TEXTURE_RECTANGLE_ARB)
1923         {
1924             surface->flags &= ~SFLAG_NORMCOORD;
1925         }
1926         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
1927         {
1928             surface->flags |= SFLAG_NORMCOORD;
1929         }
1930     }
1931     surface->texture_target = target;
1932     surface_force_reload(surface);
1933 }
1934
1935 /* Context activation is done by the caller. */
1936 void surface_bind(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
1937 {
1938     TRACE("surface %p, gl_info %p, srgb %#x.\n", surface, gl_info, srgb);
1939
1940     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
1941     {
1942         struct wined3d_texture *texture = surface->container.u.texture;
1943
1944         TRACE("Passing to container (%p).\n", texture);
1945         texture->texture_ops->texture_bind(texture, gl_info, srgb);
1946     }
1947     else
1948     {
1949         if (surface->texture_level)
1950         {
1951             ERR("Standalone surface %p is non-zero texture level %u.\n",
1952                     surface, surface->texture_level);
1953         }
1954
1955         if (srgb)
1956             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
1957
1958         ENTER_GL();
1959
1960         if (!surface->texture_name)
1961         {
1962             glGenTextures(1, &surface->texture_name);
1963             checkGLcall("glGenTextures");
1964
1965             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
1966
1967             glBindTexture(surface->texture_target, surface->texture_name);
1968             checkGLcall("glBindTexture");
1969             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
1970             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
1971             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
1972             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1973             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1974             checkGLcall("glTexParameteri");
1975         }
1976         else
1977         {
1978             glBindTexture(surface->texture_target, surface->texture_name);
1979             checkGLcall("glBindTexture");
1980         }
1981
1982         LEAVE_GL();
1983     }
1984 }
1985
1986 /* This function checks if the primary render target uses the 8bit paletted format. */
1987 static BOOL primary_render_target_is_p8(struct wined3d_device *device)
1988 {
1989     if (device->fb.render_targets && device->fb.render_targets[0])
1990     {
1991         struct wined3d_surface *render_target = device->fb.render_targets[0];
1992         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1993                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1994             return TRUE;
1995     }
1996     return FALSE;
1997 }
1998
1999 /* This call just downloads data, the caller is responsible for binding the
2000  * correct texture. */
2001 /* Context activation is done by the caller. */
2002 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2003 {
2004     const struct wined3d_format *format = surface->resource.format;
2005
2006     /* Only support read back of converted P8 surfaces. */
2007     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2008     {
2009         FIXME("Readback conversion not supported for format %s.\n", debug_d3dformat(format->id));
2010         return;
2011     }
2012
2013     ENTER_GL();
2014
2015     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2016     {
2017         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2018                 surface, surface->texture_level, format->glFormat, format->glType,
2019                 surface->resource.allocatedMemory);
2020
2021         if (surface->flags & SFLAG_PBO)
2022         {
2023             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2024             checkGLcall("glBindBufferARB");
2025             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2026             checkGLcall("glGetCompressedTexImageARB");
2027             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2028             checkGLcall("glBindBufferARB");
2029         }
2030         else
2031         {
2032             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2033                     surface->texture_level, surface->resource.allocatedMemory));
2034             checkGLcall("glGetCompressedTexImageARB");
2035         }
2036
2037         LEAVE_GL();
2038     }
2039     else
2040     {
2041         void *mem;
2042         GLenum gl_format = format->glFormat;
2043         GLenum gl_type = format->glType;
2044         int src_pitch = 0;
2045         int dst_pitch = 0;
2046
2047         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2048         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2049         {
2050             gl_format = GL_ALPHA;
2051             gl_type = GL_UNSIGNED_BYTE;
2052         }
2053
2054         if (surface->flags & SFLAG_NONPOW2)
2055         {
2056             unsigned char alignment = surface->resource.device->surface_alignment;
2057             src_pitch = format->byte_count * surface->pow2Width;
2058             dst_pitch = wined3d_surface_get_pitch(surface);
2059             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2060             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2061         }
2062         else
2063         {
2064             mem = surface->resource.allocatedMemory;
2065         }
2066
2067         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2068                 surface, surface->texture_level, gl_format, gl_type, mem);
2069
2070         if (surface->flags & SFLAG_PBO)
2071         {
2072             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2073             checkGLcall("glBindBufferARB");
2074
2075             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2076             checkGLcall("glGetTexImage");
2077
2078             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2079             checkGLcall("glBindBufferARB");
2080         }
2081         else
2082         {
2083             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2084             checkGLcall("glGetTexImage");
2085         }
2086         LEAVE_GL();
2087
2088         if (surface->flags & SFLAG_NONPOW2)
2089         {
2090             const BYTE *src_data;
2091             BYTE *dst_data;
2092             UINT y;
2093             /*
2094              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2095              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2096              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2097              *
2098              * We're doing this...
2099              *
2100              * instead of boxing the texture :
2101              * |<-texture width ->|  -->pow2width|   /\
2102              * |111111111111111111|              |   |
2103              * |222 Texture 222222| boxed empty  | texture height
2104              * |3333 Data 33333333|              |   |
2105              * |444444444444444444|              |   \/
2106              * -----------------------------------   |
2107              * |     boxed  empty | boxed empty  | pow2height
2108              * |                  |              |   \/
2109              * -----------------------------------
2110              *
2111              *
2112              * we're repacking the data to the expected texture width
2113              *
2114              * |<-texture width ->|  -->pow2width|   /\
2115              * |111111111111111111222222222222222|   |
2116              * |222333333333333333333444444444444| texture height
2117              * |444444                           |   |
2118              * |                                 |   \/
2119              * |                                 |   |
2120              * |            empty                | pow2height
2121              * |                                 |   \/
2122              * -----------------------------------
2123              *
2124              * == is the same as
2125              *
2126              * |<-texture width ->|    /\
2127              * |111111111111111111|
2128              * |222222222222222222|texture height
2129              * |333333333333333333|
2130              * |444444444444444444|    \/
2131              * --------------------
2132              *
2133              * this also means that any references to allocatedMemory should work with the data as if were a
2134              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2135              *
2136              * internally the texture is still stored in a boxed format so any references to textureName will
2137              * get a boxed texture with width pow2width and not a texture of width resource.width.
2138              *
2139              * Performance should not be an issue, because applications normally do not lock the surfaces when
2140              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2141              * and doesn't have to be re-read. */
2142             src_data = mem;
2143             dst_data = surface->resource.allocatedMemory;
2144             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2145             for (y = 1; y < surface->resource.height; ++y)
2146             {
2147                 /* skip the first row */
2148                 src_data += src_pitch;
2149                 dst_data += dst_pitch;
2150                 memcpy(dst_data, src_data, dst_pitch);
2151             }
2152
2153             HeapFree(GetProcessHeap(), 0, mem);
2154         }
2155     }
2156
2157     /* Surface has now been downloaded */
2158     surface->flags |= SFLAG_INSYSMEM;
2159 }
2160
2161 /* This call just uploads data, the caller is responsible for binding the
2162  * correct texture. */
2163 /* Context activation is done by the caller. */
2164 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2165         const struct wined3d_format *format, BOOL srgb, const GLvoid *data)
2166 {
2167     GLsizei width = surface->resource.width;
2168     GLsizei height = surface->resource.height;
2169     GLenum internal;
2170
2171     if (srgb)
2172     {
2173         internal = format->glGammaInternal;
2174     }
2175     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2176     {
2177         internal = format->rtInternal;
2178     }
2179     else
2180     {
2181         internal = format->glInternal;
2182     }
2183
2184     TRACE("surface %p, internal %#x, width %d, height %d, format %#x, type %#x, data %p.\n",
2185             surface, internal, width, height, format->glFormat, format->glType, data);
2186     TRACE("target %#x, level %u, resource size %u.\n",
2187             surface->texture_target, surface->texture_level, surface->resource.size);
2188
2189     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2190
2191     ENTER_GL();
2192
2193     if (surface->flags & SFLAG_PBO)
2194     {
2195         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
2196         checkGLcall("glBindBufferARB");
2197
2198         TRACE("(%p) pbo: %#x, data: %p.\n", surface, surface->pbo, data);
2199         data = NULL;
2200     }
2201
2202     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2203     {
2204         TRACE("Calling glCompressedTexSubImage2DARB.\n");
2205
2206         GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2207                 0, 0, width, height, internal, surface->resource.size, data));
2208         checkGLcall("glCompressedTexSubImage2DARB");
2209     }
2210     else
2211     {
2212         TRACE("Calling glTexSubImage2D.\n");
2213
2214         glTexSubImage2D(surface->texture_target, surface->texture_level,
2215                 0, 0, width, height, format->glFormat, format->glType, data);
2216         checkGLcall("glTexSubImage2D");
2217     }
2218
2219     if (surface->flags & SFLAG_PBO)
2220     {
2221         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2222         checkGLcall("glBindBufferARB");
2223     }
2224
2225     LEAVE_GL();
2226
2227     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2228     {
2229         struct wined3d_device *device = surface->resource.device;
2230         unsigned int i;
2231
2232         for (i = 0; i < device->context_count; ++i)
2233         {
2234             context_surface_update(device->contexts[i], surface);
2235         }
2236     }
2237 }
2238
2239 /* This call just allocates the texture, the caller is responsible for binding
2240  * the correct texture. */
2241 /* Context activation is done by the caller. */
2242 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2243         const struct wined3d_format *format, BOOL srgb)
2244 {
2245     BOOL enable_client_storage = FALSE;
2246     GLsizei width = surface->pow2Width;
2247     GLsizei height = surface->pow2Height;
2248     const BYTE *mem = NULL;
2249     GLenum internal;
2250
2251     if (srgb)
2252     {
2253         internal = format->glGammaInternal;
2254     }
2255     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2256     {
2257         internal = format->rtInternal;
2258     }
2259     else
2260     {
2261         internal = format->glInternal;
2262     }
2263
2264     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2265
2266     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2267             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2268             internal, width, height, format->glFormat, format->glType);
2269
2270     ENTER_GL();
2271
2272     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2273     {
2274         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2275                 || !surface->resource.allocatedMemory)
2276         {
2277             /* In some cases we want to disable client storage.
2278              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2279              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2280              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2281              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2282              */
2283             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2284             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2285             surface->flags &= ~SFLAG_CLIENT;
2286             enable_client_storage = TRUE;
2287         }
2288         else
2289         {
2290             surface->flags |= SFLAG_CLIENT;
2291
2292             /* Point OpenGL to our allocated texture memory. Do not use
2293              * resource.allocatedMemory here because it might point into a
2294              * PBO. Instead use heapMemory, but get the alignment right. */
2295             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2296                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2297         }
2298     }
2299
2300     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2301     {
2302         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2303                 internal, width, height, 0, surface->resource.size, mem));
2304         checkGLcall("glCompressedTexImage2DARB");
2305     }
2306     else
2307     {
2308         glTexImage2D(surface->texture_target, surface->texture_level,
2309                 internal, width, height, 0, format->glFormat, format->glType, mem);
2310         checkGLcall("glTexImage2D");
2311     }
2312
2313     if(enable_client_storage) {
2314         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2315         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2316     }
2317     LEAVE_GL();
2318 }
2319
2320 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2321  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2322 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2323 /* GL locking is done by the caller */
2324 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, struct wined3d_surface *rt)
2325 {
2326     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2327     struct wined3d_renderbuffer_entry *entry;
2328     GLuint renderbuffer = 0;
2329     unsigned int src_width, src_height;
2330     unsigned int width, height;
2331
2332     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2333     {
2334         width = rt->pow2Width;
2335         height = rt->pow2Height;
2336     }
2337     else
2338     {
2339         width = surface->pow2Width;
2340         height = surface->pow2Height;
2341     }
2342
2343     src_width = surface->pow2Width;
2344     src_height = surface->pow2Height;
2345
2346     /* A depth stencil smaller than the render target is not valid */
2347     if (width > src_width || height > src_height) return;
2348
2349     /* Remove any renderbuffer set if the sizes match */
2350     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2351             || (width == src_width && height == src_height))
2352     {
2353         surface->current_renderbuffer = NULL;
2354         return;
2355     }
2356
2357     /* Look if we've already got a renderbuffer of the correct dimensions */
2358     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2359     {
2360         if (entry->width == width && entry->height == height)
2361         {
2362             renderbuffer = entry->id;
2363             surface->current_renderbuffer = entry;
2364             break;
2365         }
2366     }
2367
2368     if (!renderbuffer)
2369     {
2370         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2371         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2372         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2373                 surface->resource.format->glInternal, width, height);
2374
2375         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2376         entry->width = width;
2377         entry->height = height;
2378         entry->id = renderbuffer;
2379         list_add_head(&surface->renderbuffers, &entry->entry);
2380
2381         surface->current_renderbuffer = entry;
2382     }
2383
2384     checkGLcall("set_compatible_renderbuffer");
2385 }
2386
2387 GLenum surface_get_gl_buffer(struct wined3d_surface *surface)
2388 {
2389     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2390
2391     TRACE("surface %p.\n", surface);
2392
2393     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2394     {
2395         ERR("Surface %p is not on a swapchain.\n", surface);
2396         return GL_NONE;
2397     }
2398
2399     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2400     {
2401         if (swapchain->render_to_fbo)
2402         {
2403             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2404             return GL_COLOR_ATTACHMENT0;
2405         }
2406         TRACE("Returning GL_BACK\n");
2407         return GL_BACK;
2408     }
2409     else if (surface == swapchain->front_buffer)
2410     {
2411         TRACE("Returning GL_FRONT\n");
2412         return GL_FRONT;
2413     }
2414
2415     FIXME("Higher back buffer, returning GL_BACK\n");
2416     return GL_BACK;
2417 }
2418
2419 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2420 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2421 {
2422     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2423
2424     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2425         /* No partial locking for textures yet. */
2426         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2427
2428     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2429     if (dirty_rect)
2430     {
2431         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2432         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2433         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2434         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2435     }
2436     else
2437     {
2438         surface->dirtyRect.left = 0;
2439         surface->dirtyRect.top = 0;
2440         surface->dirtyRect.right = surface->resource.width;
2441         surface->dirtyRect.bottom = surface->resource.height;
2442     }
2443
2444     /* if the container is a texture then mark it dirty. */
2445     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2446     {
2447         TRACE("Passing to container.\n");
2448         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2449     }
2450 }
2451
2452 static BOOL surface_convert_color_to_float(struct wined3d_surface *surface,
2453         DWORD color, WINED3DCOLORVALUE *float_color)
2454 {
2455     const struct wined3d_format *format = surface->resource.format;
2456     struct wined3d_device *device = surface->resource.device;
2457
2458     switch (format->id)
2459     {
2460         case WINED3DFMT_P8_UINT:
2461             if (surface->palette)
2462             {
2463                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
2464                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
2465                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
2466             }
2467             else
2468             {
2469                 float_color->r = 0.0f;
2470                 float_color->g = 0.0f;
2471                 float_color->b = 0.0f;
2472             }
2473             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
2474             break;
2475
2476         case WINED3DFMT_B5G6R5_UNORM:
2477             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
2478             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
2479             float_color->b = (color & 0x1f) / 31.0f;
2480             float_color->a = 1.0f;
2481             break;
2482
2483         case WINED3DFMT_B8G8R8_UNORM:
2484         case WINED3DFMT_B8G8R8X8_UNORM:
2485             float_color->r = D3DCOLOR_R(color);
2486             float_color->g = D3DCOLOR_G(color);
2487             float_color->b = D3DCOLOR_B(color);
2488             float_color->a = 1.0f;
2489             break;
2490
2491         case WINED3DFMT_B8G8R8A8_UNORM:
2492             float_color->r = D3DCOLOR_R(color);
2493             float_color->g = D3DCOLOR_G(color);
2494             float_color->b = D3DCOLOR_B(color);
2495             float_color->a = D3DCOLOR_A(color);
2496             break;
2497
2498         default:
2499             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
2500             return FALSE;
2501     }
2502
2503     return TRUE;
2504 }
2505
2506 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2507 {
2508     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2509
2510     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2511
2512     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2513     {
2514         ERR("Not supported on scratch surfaces.\n");
2515         return WINED3DERR_INVALIDCALL;
2516     }
2517
2518     if (!(surface->flags & flag))
2519     {
2520         TRACE("Reloading because surface is dirty\n");
2521     }
2522     /* Reload if either the texture and sysmem have different ideas about the
2523      * color key, or the actual key values changed. */
2524     else if (!(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2525             || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2526             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2527             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2528     {
2529         TRACE("Reloading because of color keying\n");
2530         /* To perform the color key conversion we need a sysmem copy of
2531          * the surface. Make sure we have it. */
2532
2533         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2534         /* Make sure the texture is reloaded because of the color key change,
2535          * this kills performance though :( */
2536         /* TODO: This is not necessarily needed with hw palettized texture support. */
2537         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2538     }
2539     else
2540     {
2541         TRACE("surface is already in texture\n");
2542         return WINED3D_OK;
2543     }
2544
2545     /* No partial locking for textures yet. */
2546     surface_load_location(surface, flag, NULL);
2547     surface_evict_sysmem(surface);
2548
2549     return WINED3D_OK;
2550 }
2551
2552 /* See also float_16_to_32() in wined3d_private.h */
2553 static inline unsigned short float_32_to_16(const float *in)
2554 {
2555     int exp = 0;
2556     float tmp = fabsf(*in);
2557     unsigned int mantissa;
2558     unsigned short ret;
2559
2560     /* Deal with special numbers */
2561     if (*in == 0.0f)
2562         return 0x0000;
2563     if (isnan(*in))
2564         return 0x7c01;
2565     if (isinf(*in))
2566         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2567
2568     if (tmp < powf(2, 10))
2569     {
2570         do
2571         {
2572             tmp = tmp * 2.0f;
2573             exp--;
2574         } while (tmp < powf(2, 10));
2575     }
2576     else if (tmp >= powf(2, 11))
2577     {
2578         do
2579         {
2580             tmp /= 2.0f;
2581             exp++;
2582         } while (tmp >= powf(2, 11));
2583     }
2584
2585     mantissa = (unsigned int)tmp;
2586     if (tmp - mantissa >= 0.5f)
2587         ++mantissa; /* Round to nearest, away from zero. */
2588
2589     exp += 10;  /* Normalize the mantissa. */
2590     exp += 15;  /* Exponent is encoded with excess 15. */
2591
2592     if (exp > 30) /* too big */
2593     {
2594         ret = 0x7c00; /* INF */
2595     }
2596     else if (exp <= 0)
2597     {
2598         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2599         while (exp <= 0)
2600         {
2601             mantissa = mantissa >> 1;
2602             ++exp;
2603         }
2604         ret = mantissa & 0x3ff;
2605     }
2606     else
2607     {
2608         ret = (exp << 10) | (mantissa & 0x3ff);
2609     }
2610
2611     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2612     return ret;
2613 }
2614
2615 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2616 {
2617     ULONG refcount;
2618
2619     TRACE("Surface %p, container %p of type %#x.\n",
2620             surface, surface->container.u.base, surface->container.type);
2621
2622     switch (surface->container.type)
2623     {
2624         case WINED3D_CONTAINER_TEXTURE:
2625             return wined3d_texture_incref(surface->container.u.texture);
2626
2627         case WINED3D_CONTAINER_SWAPCHAIN:
2628             return wined3d_swapchain_incref(surface->container.u.swapchain);
2629
2630         default:
2631             ERR("Unhandled container type %#x.\n", surface->container.type);
2632         case WINED3D_CONTAINER_NONE:
2633             break;
2634     }
2635
2636     refcount = InterlockedIncrement(&surface->resource.ref);
2637     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2638
2639     return refcount;
2640 }
2641
2642 /* Do not call while under the GL lock. */
2643 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2644 {
2645     ULONG refcount;
2646
2647     TRACE("Surface %p, container %p of type %#x.\n",
2648             surface, surface->container.u.base, surface->container.type);
2649
2650     switch (surface->container.type)
2651     {
2652         case WINED3D_CONTAINER_TEXTURE:
2653             return wined3d_texture_decref(surface->container.u.texture);
2654
2655         case WINED3D_CONTAINER_SWAPCHAIN:
2656             return wined3d_swapchain_decref(surface->container.u.swapchain);
2657
2658         default:
2659             ERR("Unhandled container type %#x.\n", surface->container.type);
2660         case WINED3D_CONTAINER_NONE:
2661             break;
2662     }
2663
2664     refcount = InterlockedDecrement(&surface->resource.ref);
2665     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2666
2667     if (!refcount)
2668     {
2669         surface->surface_ops->surface_cleanup(surface);
2670         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2671
2672         TRACE("Destroyed surface %p.\n", surface);
2673         HeapFree(GetProcessHeap(), 0, surface);
2674     }
2675
2676     return refcount;
2677 }
2678
2679 HRESULT CDECL wined3d_surface_set_private_data(struct wined3d_surface *surface,
2680         REFGUID riid, const void *data, DWORD data_size, DWORD flags)
2681 {
2682     return resource_set_private_data(&surface->resource, riid, data, data_size, flags);
2683 }
2684
2685 HRESULT CDECL wined3d_surface_get_private_data(const struct wined3d_surface *surface,
2686         REFGUID guid, void *data, DWORD *data_size)
2687 {
2688     return resource_get_private_data(&surface->resource, guid, data, data_size);
2689 }
2690
2691 HRESULT CDECL wined3d_surface_free_private_data(struct wined3d_surface *surface, REFGUID refguid)
2692 {
2693     return resource_free_private_data(&surface->resource, refguid);
2694 }
2695
2696 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2697 {
2698     return resource_set_priority(&surface->resource, priority);
2699 }
2700
2701 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2702 {
2703     return resource_get_priority(&surface->resource);
2704 }
2705
2706 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2707 {
2708     TRACE("surface %p.\n", surface);
2709
2710     surface->surface_ops->surface_preload(surface);
2711 }
2712
2713 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2714 {
2715     TRACE("surface %p.\n", surface);
2716
2717     return surface->resource.parent;
2718 }
2719
2720 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2721 {
2722     TRACE("surface %p.\n", surface);
2723
2724     return &surface->resource;
2725 }
2726
2727 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2728 {
2729     TRACE("surface %p, flags %#x.\n", surface, flags);
2730
2731     switch (flags)
2732     {
2733         case WINEDDGBS_CANBLT:
2734         case WINEDDGBS_ISBLTDONE:
2735             return WINED3D_OK;
2736
2737         default:
2738             return WINED3DERR_INVALIDCALL;
2739     }
2740 }
2741
2742 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2743 {
2744     TRACE("surface %p, flags %#x.\n", surface, flags);
2745
2746     /* XXX: DDERR_INVALIDSURFACETYPE */
2747
2748     switch (flags)
2749     {
2750         case WINEDDGFS_CANFLIP:
2751         case WINEDDGFS_ISFLIPDONE:
2752             return WINED3D_OK;
2753
2754         default:
2755             return WINED3DERR_INVALIDCALL;
2756     }
2757 }
2758
2759 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2760 {
2761     TRACE("surface %p.\n", surface);
2762
2763     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2764     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2765 }
2766
2767 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2768 {
2769     TRACE("surface %p.\n", surface);
2770
2771     /* So far we don't lose anything :) */
2772     surface->flags &= ~SFLAG_LOST;
2773     return WINED3D_OK;
2774 }
2775
2776 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2777 {
2778     TRACE("surface %p, palette %p.\n", surface, palette);
2779
2780     if (surface->palette == palette)
2781     {
2782         TRACE("Nop palette change.\n");
2783         return WINED3D_OK;
2784     }
2785
2786     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2787         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2788
2789     surface->palette = palette;
2790
2791     if (palette)
2792     {
2793         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2794             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2795
2796         surface->surface_ops->surface_realize_palette(surface);
2797     }
2798
2799     return WINED3D_OK;
2800 }
2801
2802 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
2803         DWORD flags, const WINEDDCOLORKEY *color_key)
2804 {
2805     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
2806
2807     if (flags & WINEDDCKEY_COLORSPACE)
2808     {
2809         FIXME(" colorkey value not supported (%08x) !\n", flags);
2810         return WINED3DERR_INVALIDCALL;
2811     }
2812
2813     /* Dirtify the surface, but only if a key was changed. */
2814     if (color_key)
2815     {
2816         switch (flags & ~WINEDDCKEY_COLORSPACE)
2817         {
2818             case WINEDDCKEY_DESTBLT:
2819                 surface->DestBltCKey = *color_key;
2820                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
2821                 break;
2822
2823             case WINEDDCKEY_DESTOVERLAY:
2824                 surface->DestOverlayCKey = *color_key;
2825                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
2826                 break;
2827
2828             case WINEDDCKEY_SRCOVERLAY:
2829                 surface->SrcOverlayCKey = *color_key;
2830                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
2831                 break;
2832
2833             case WINEDDCKEY_SRCBLT:
2834                 surface->SrcBltCKey = *color_key;
2835                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
2836                 break;
2837         }
2838     }
2839     else
2840     {
2841         switch (flags & ~WINEDDCKEY_COLORSPACE)
2842         {
2843             case WINEDDCKEY_DESTBLT:
2844                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
2845                 break;
2846
2847             case WINEDDCKEY_DESTOVERLAY:
2848                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
2849                 break;
2850
2851             case WINEDDCKEY_SRCOVERLAY:
2852                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
2853                 break;
2854
2855             case WINEDDCKEY_SRCBLT:
2856                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
2857                 break;
2858         }
2859     }
2860
2861     return WINED3D_OK;
2862 }
2863
2864 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
2865 {
2866     TRACE("surface %p.\n", surface);
2867
2868     return surface->palette;
2869 }
2870
2871 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
2872 {
2873     const struct wined3d_format *format = surface->resource.format;
2874     DWORD pitch;
2875
2876     TRACE("surface %p.\n", surface);
2877
2878     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
2879     {
2880         /* Since compressed formats are block based, pitch means the amount of
2881          * bytes to the next row of block rather than the next row of pixels. */
2882         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
2883         pitch = row_block_count * format->block_byte_count;
2884     }
2885     else
2886     {
2887         unsigned char alignment = surface->resource.device->surface_alignment;
2888         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
2889         pitch = (pitch + alignment - 1) & ~(alignment - 1);
2890     }
2891
2892     TRACE("Returning %u.\n", pitch);
2893
2894     return pitch;
2895 }
2896
2897 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
2898 {
2899     TRACE("surface %p, mem %p.\n", surface, mem);
2900
2901     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
2902     {
2903         WARN("Surface is locked or the DC is in use.\n");
2904         return WINED3DERR_INVALIDCALL;
2905     }
2906
2907     return surface->surface_ops->surface_set_mem(surface, mem);
2908 }
2909
2910 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
2911 {
2912     LONG w, h;
2913
2914     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
2915
2916     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2917     {
2918         WARN("Not an overlay surface.\n");
2919         return WINEDDERR_NOTAOVERLAYSURFACE;
2920     }
2921
2922     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
2923     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
2924     surface->overlay_destrect.left = x;
2925     surface->overlay_destrect.top = y;
2926     surface->overlay_destrect.right = x + w;
2927     surface->overlay_destrect.bottom = y + h;
2928
2929     surface->surface_ops->surface_draw_overlay(surface);
2930
2931     return WINED3D_OK;
2932 }
2933
2934 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
2935 {
2936     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
2937
2938     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2939     {
2940         TRACE("Not an overlay surface.\n");
2941         return WINEDDERR_NOTAOVERLAYSURFACE;
2942     }
2943
2944     if (!surface->overlay_dest)
2945     {
2946         TRACE("Overlay not visible.\n");
2947         *x = 0;
2948         *y = 0;
2949         return WINEDDERR_OVERLAYNOTVISIBLE;
2950     }
2951
2952     *x = surface->overlay_destrect.left;
2953     *y = surface->overlay_destrect.top;
2954
2955     TRACE("Returning position %d, %d.\n", *x, *y);
2956
2957     return WINED3D_OK;
2958 }
2959
2960 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
2961         DWORD flags, struct wined3d_surface *ref)
2962 {
2963     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
2964
2965     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2966     {
2967         TRACE("Not an overlay surface.\n");
2968         return WINEDDERR_NOTAOVERLAYSURFACE;
2969     }
2970
2971     return WINED3D_OK;
2972 }
2973
2974 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
2975         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
2976 {
2977     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
2978             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
2979
2980     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
2981     {
2982         WARN("Not an overlay surface.\n");
2983         return WINEDDERR_NOTAOVERLAYSURFACE;
2984     }
2985     else if (!dst_surface)
2986     {
2987         WARN("Dest surface is NULL.\n");
2988         return WINED3DERR_INVALIDCALL;
2989     }
2990
2991     if (src_rect)
2992     {
2993         surface->overlay_srcrect = *src_rect;
2994     }
2995     else
2996     {
2997         surface->overlay_srcrect.left = 0;
2998         surface->overlay_srcrect.top = 0;
2999         surface->overlay_srcrect.right = surface->resource.width;
3000         surface->overlay_srcrect.bottom = surface->resource.height;
3001     }
3002
3003     if (dst_rect)
3004     {
3005         surface->overlay_destrect = *dst_rect;
3006     }
3007     else
3008     {
3009         surface->overlay_destrect.left = 0;
3010         surface->overlay_destrect.top = 0;
3011         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3012         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3013     }
3014
3015     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3016     {
3017         list_remove(&surface->overlay_entry);
3018     }
3019
3020     if (flags & WINEDDOVER_SHOW)
3021     {
3022         if (surface->overlay_dest != dst_surface)
3023         {
3024             surface->overlay_dest = dst_surface;
3025             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3026         }
3027     }
3028     else if (flags & WINEDDOVER_HIDE)
3029     {
3030         /* tests show that the rectangles are erased on hide */
3031         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3032         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3033         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3034         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3035         surface->overlay_dest = NULL;
3036     }
3037
3038     surface->surface_ops->surface_draw_overlay(surface);
3039
3040     return WINED3D_OK;
3041 }
3042
3043 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3044 {
3045     TRACE("surface %p, clipper %p.\n", surface, clipper);
3046
3047     surface->clipper = clipper;
3048
3049     return WINED3D_OK;
3050 }
3051
3052 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3053 {
3054     TRACE("surface %p.\n", surface);
3055
3056     return surface->clipper;
3057 }
3058
3059 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3060 {
3061     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3062
3063     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3064
3065     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3066     {
3067         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3068         return WINED3DERR_INVALIDCALL;
3069     }
3070
3071     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3072             surface->pow2Width, surface->pow2Height);
3073     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3074     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
3075     surface->resource.format = format;
3076
3077     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3078     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3079             format->glFormat, format->glInternal, format->glType);
3080
3081     return WINED3D_OK;
3082 }
3083
3084 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3085         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3086 {
3087     unsigned short *dst_s;
3088     const float *src_f;
3089     unsigned int x, y;
3090
3091     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3092
3093     for (y = 0; y < h; ++y)
3094     {
3095         src_f = (const float *)(src + y * pitch_in);
3096         dst_s = (unsigned short *) (dst + y * pitch_out);
3097         for (x = 0; x < w; ++x)
3098         {
3099             dst_s[x] = float_32_to_16(src_f + x);
3100         }
3101     }
3102 }
3103
3104 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3105         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3106 {
3107     static const unsigned char convert_5to8[] =
3108     {
3109         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3110         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3111         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3112         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3113     };
3114     static const unsigned char convert_6to8[] =
3115     {
3116         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3117         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3118         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3119         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3120         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3121         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3122         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3123         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3124     };
3125     unsigned int x, y;
3126
3127     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3128
3129     for (y = 0; y < h; ++y)
3130     {
3131         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3132         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3133         for (x = 0; x < w; ++x)
3134         {
3135             WORD pixel = src_line[x];
3136             dst_line[x] = 0xff000000
3137                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3138                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3139                     | convert_5to8[(pixel & 0x001f)];
3140         }
3141     }
3142 }
3143
3144 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3145         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3146 {
3147     unsigned int x, y;
3148
3149     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3150
3151     for (y = 0; y < h; ++y)
3152     {
3153         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3154         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3155
3156         for (x = 0; x < w; ++x)
3157         {
3158             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3159         }
3160     }
3161 }
3162
3163 static inline BYTE cliptobyte(int x)
3164 {
3165     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3166 }
3167
3168 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3169         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3170 {
3171     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3172     unsigned int x, y;
3173
3174     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3175
3176     for (y = 0; y < h; ++y)
3177     {
3178         const BYTE *src_line = src + y * pitch_in;
3179         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3180         for (x = 0; x < w; ++x)
3181         {
3182             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3183              *     C = Y - 16; D = U - 128; E = V - 128;
3184              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3185              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3186              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3187              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3188              * U and V are shared between the pixels. */
3189             if (!(x & 1)) /* For every even pixel, read new U and V. */
3190             {
3191                 d = (int) src_line[1] - 128;
3192                 e = (int) src_line[3] - 128;
3193                 r2 = 409 * e + 128;
3194                 g2 = - 100 * d - 208 * e + 128;
3195                 b2 = 516 * d + 128;
3196             }
3197             c2 = 298 * ((int) src_line[0] - 16);
3198             dst_line[x] = 0xff000000
3199                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3200                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3201                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3202                 /* Scale RGB values to 0..255 range,
3203                  * then clip them if still not in range (may be negative),
3204                  * then shift them within DWORD if necessary. */
3205             src_line += 2;
3206         }
3207     }
3208 }
3209
3210 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3211         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3212 {
3213     unsigned int x, y;
3214     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3215
3216     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3217
3218     for (y = 0; y < h; ++y)
3219     {
3220         const BYTE *src_line = src + y * pitch_in;
3221         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3222         for (x = 0; x < w; ++x)
3223         {
3224             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3225              *     C = Y - 16; D = U - 128; E = V - 128;
3226              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3227              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3228              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3229              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3230              * U and V are shared between the pixels. */
3231             if (!(x & 1)) /* For every even pixel, read new U and V. */
3232             {
3233                 d = (int) src_line[1] - 128;
3234                 e = (int) src_line[3] - 128;
3235                 r2 = 409 * e + 128;
3236                 g2 = - 100 * d - 208 * e + 128;
3237                 b2 = 516 * d + 128;
3238             }
3239             c2 = 298 * ((int) src_line[0] - 16);
3240             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3241                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3242                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3243                 /* Scale RGB values to 0..255 range,
3244                  * then clip them if still not in range (may be negative),
3245                  * then shift them within DWORD if necessary. */
3246             src_line += 2;
3247         }
3248     }
3249 }
3250
3251 struct d3dfmt_convertor_desc
3252 {
3253     enum wined3d_format_id from, to;
3254     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3255 };
3256
3257 static const struct d3dfmt_convertor_desc convertors[] =
3258 {
3259     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3260     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3261     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3262     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3263     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3264 };
3265
3266 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3267         enum wined3d_format_id to)
3268 {
3269     unsigned int i;
3270
3271     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3272     {
3273         if (convertors[i].from == from && convertors[i].to == to)
3274             return &convertors[i];
3275     }
3276
3277     return NULL;
3278 }
3279
3280 /*****************************************************************************
3281  * surface_convert_format
3282  *
3283  * Creates a duplicate of a surface in a different format. Is used by Blt to
3284  * blit between surfaces with different formats.
3285  *
3286  * Parameters
3287  *  source: Source surface
3288  *  fmt: Requested destination format
3289  *
3290  *****************************************************************************/
3291 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3292 {
3293     const struct d3dfmt_convertor_desc *conv;
3294     WINED3DLOCKED_RECT lock_src, lock_dst;
3295     struct wined3d_surface *ret = NULL;
3296     HRESULT hr;
3297
3298     conv = find_convertor(source->resource.format->id, to_fmt);
3299     if (!conv)
3300     {
3301         FIXME("Cannot find a conversion function from format %s to %s.\n",
3302                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3303         return NULL;
3304     }
3305
3306     wined3d_surface_create(source->resource.device, source->resource.width,
3307             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3308             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3309             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3310     if (!ret)
3311     {
3312         ERR("Failed to create a destination surface for conversion.\n");
3313         return NULL;
3314     }
3315
3316     memset(&lock_src, 0, sizeof(lock_src));
3317     memset(&lock_dst, 0, sizeof(lock_dst));
3318
3319     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3320     if (FAILED(hr))
3321     {
3322         ERR("Failed to lock the source surface.\n");
3323         wined3d_surface_decref(ret);
3324         return NULL;
3325     }
3326     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3327     if (FAILED(hr))
3328     {
3329         ERR("Failed to lock the destination surface.\n");
3330         wined3d_surface_unmap(source);
3331         wined3d_surface_decref(ret);
3332         return NULL;
3333     }
3334
3335     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3336             source->resource.width, source->resource.height);
3337
3338     wined3d_surface_unmap(ret);
3339     wined3d_surface_unmap(source);
3340
3341     return ret;
3342 }
3343
3344 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3345         unsigned int bpp, UINT pitch, DWORD color)
3346 {
3347     BYTE *first;
3348     int x, y;
3349
3350     /* Do first row */
3351
3352 #define COLORFILL_ROW(type) \
3353 do { \
3354     type *d = (type *)buf; \
3355     for (x = 0; x < width; ++x) \
3356         d[x] = (type)color; \
3357 } while(0)
3358
3359     switch (bpp)
3360     {
3361         case 1:
3362             COLORFILL_ROW(BYTE);
3363             break;
3364
3365         case 2:
3366             COLORFILL_ROW(WORD);
3367             break;
3368
3369         case 3:
3370         {
3371             BYTE *d = buf;
3372             for (x = 0; x < width; ++x, d += 3)
3373             {
3374                 d[0] = (color      ) & 0xFF;
3375                 d[1] = (color >>  8) & 0xFF;
3376                 d[2] = (color >> 16) & 0xFF;
3377             }
3378             break;
3379         }
3380         case 4:
3381             COLORFILL_ROW(DWORD);
3382             break;
3383
3384         default:
3385             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3386             return WINED3DERR_NOTAVAILABLE;
3387     }
3388
3389 #undef COLORFILL_ROW
3390
3391     /* Now copy first row. */
3392     first = buf;
3393     for (y = 1; y < height; ++y)
3394     {
3395         buf += pitch;
3396         memcpy(buf, first, width * bpp);
3397     }
3398
3399     return WINED3D_OK;
3400 }
3401
3402 /* Do not call while under the GL lock. */
3403 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
3404         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
3405         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
3406 {
3407     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
3408             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
3409             flags, fx, debug_d3dtexturefiltertype(filter));
3410
3411     return dst_surface->surface_ops->surface_blt(dst_surface,
3412             dst_rect, src_surface, src_rect, flags, fx, filter);
3413 }
3414
3415 /* Do not call while under the GL lock. */
3416 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
3417         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
3418 {
3419     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, trans %#x.\n",
3420             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
3421
3422     return dst_surface->surface_ops->surface_bltfast(dst_surface,
3423             dst_x, dst_y, src_surface, src_rect, trans);
3424 }
3425
3426 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3427 {
3428     TRACE("surface %p.\n", surface);
3429
3430     if (!(surface->flags & SFLAG_LOCKED))
3431     {
3432         WARN("Trying to unmap unmapped surface.\n");
3433         return WINEDDERR_NOTLOCKED;
3434     }
3435     surface->flags &= ~SFLAG_LOCKED;
3436
3437     surface->surface_ops->surface_unmap(surface);
3438
3439     return WINED3D_OK;
3440 }
3441
3442 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3443         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3444 {
3445     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3446             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3447
3448     if (surface->flags & SFLAG_LOCKED)
3449     {
3450         WARN("Surface is already mapped.\n");
3451         return WINED3DERR_INVALIDCALL;
3452     }
3453     surface->flags |= SFLAG_LOCKED;
3454
3455     if (!(surface->flags & SFLAG_LOCKABLE))
3456         WARN("Trying to lock unlockable surface.\n");
3457
3458     surface->surface_ops->surface_map(surface, rect, flags);
3459
3460     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3461
3462     if (!rect)
3463     {
3464         locked_rect->pBits = surface->resource.allocatedMemory;
3465         surface->lockedRect.left = 0;
3466         surface->lockedRect.top = 0;
3467         surface->lockedRect.right = surface->resource.width;
3468         surface->lockedRect.bottom = surface->resource.height;
3469     }
3470     else
3471     {
3472         const struct wined3d_format *format = surface->resource.format;
3473
3474         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3475         {
3476             /* Compressed textures are block based, so calculate the offset of
3477              * the block that contains the top-left pixel of the locked rectangle. */
3478             locked_rect->pBits = surface->resource.allocatedMemory
3479                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3480                     + ((rect->left / format->block_width) * format->block_byte_count);
3481         }
3482         else
3483         {
3484             locked_rect->pBits = surface->resource.allocatedMemory
3485                     + (locked_rect->Pitch * rect->top)
3486                     + (rect->left * format->byte_count);
3487         }
3488         surface->lockedRect.left = rect->left;
3489         surface->lockedRect.top = rect->top;
3490         surface->lockedRect.right = rect->right;
3491         surface->lockedRect.bottom = rect->bottom;
3492     }
3493
3494     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3495     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3496
3497     return WINED3D_OK;
3498 }
3499
3500 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3501 {
3502     HRESULT hr;
3503
3504     TRACE("surface %p, dc %p.\n", surface, dc);
3505
3506     if (surface->flags & SFLAG_USERPTR)
3507     {
3508         ERR("Not supported on surfaces with application-provided memory.\n");
3509         return WINEDDERR_NODC;
3510     }
3511
3512     /* Give more detailed info for ddraw. */
3513     if (surface->flags & SFLAG_DCINUSE)
3514         return WINEDDERR_DCALREADYCREATED;
3515
3516     /* Can't GetDC if the surface is locked. */
3517     if (surface->flags & SFLAG_LOCKED)
3518         return WINED3DERR_INVALIDCALL;
3519
3520     hr = surface->surface_ops->surface_getdc(surface);
3521     if (FAILED(hr))
3522         return hr;
3523
3524     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3525             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3526     {
3527         /* GetDC on palettized formats is unsupported in D3D9, and the method
3528          * is missing in D3D8, so this should only be used for DX <=7
3529          * surfaces (with non-device palettes). */
3530         const PALETTEENTRY *pal = NULL;
3531
3532         if (surface->palette)
3533         {
3534             pal = surface->palette->palents;
3535         }
3536         else
3537         {
3538             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3539             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3540
3541             if (dds_primary && dds_primary->palette)
3542                 pal = dds_primary->palette->palents;
3543         }
3544
3545         if (pal)
3546         {
3547             RGBQUAD col[256];
3548             unsigned int i;
3549
3550             for (i = 0; i < 256; ++i)
3551             {
3552                 col[i].rgbRed = pal[i].peRed;
3553                 col[i].rgbGreen = pal[i].peGreen;
3554                 col[i].rgbBlue = pal[i].peBlue;
3555                 col[i].rgbReserved = 0;
3556             }
3557             SetDIBColorTable(surface->hDC, 0, 256, col);
3558         }
3559     }
3560
3561     surface->flags |= SFLAG_DCINUSE;
3562
3563     *dc = surface->hDC;
3564     TRACE("Returning dc %p.\n", *dc);
3565
3566     return WINED3D_OK;
3567 }
3568
3569 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3570 {
3571     TRACE("surface %p, dc %p.\n", surface, dc);
3572
3573     if (!(surface->flags & SFLAG_DCINUSE))
3574         return WINEDDERR_NODC;
3575
3576     if (surface->hDC != dc)
3577     {
3578         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3579                 dc, surface->hDC);
3580         return WINEDDERR_NODC;
3581     }
3582
3583     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3584     {
3585         /* Copy the contents of the DIB over to the PBO. */
3586         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->dib.bitmap_size);
3587     }
3588
3589     /* We locked first, so unlock now. */
3590     wined3d_surface_unmap(surface);
3591
3592     surface->flags &= ~SFLAG_DCINUSE;
3593
3594     return WINED3D_OK;
3595 }
3596
3597 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3598 {
3599     struct wined3d_swapchain *swapchain;
3600     HRESULT hr;
3601
3602     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3603
3604     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
3605     {
3606         ERR("Flipped surface is not on a swapchain.\n");
3607         return WINEDDERR_NOTFLIPPABLE;
3608     }
3609     swapchain = surface->container.u.swapchain;
3610
3611     hr = surface->surface_ops->surface_flip(surface, override);
3612     if (FAILED(hr))
3613         return hr;
3614
3615     /* Just overwrite the swapchain presentation interval. This is ok because
3616      * only ddraw apps can call Flip, and only d3d8 and d3d9 applications
3617      * specify the presentation interval. */
3618     if (!(flags & (WINEDDFLIP_NOVSYNC | WINEDDFLIP_INTERVAL2 | WINEDDFLIP_INTERVAL3 | WINEDDFLIP_INTERVAL4)))
3619         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_ONE;
3620     else if (flags & WINEDDFLIP_NOVSYNC)
3621         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
3622     else if (flags & WINEDDFLIP_INTERVAL2)
3623         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_TWO;
3624     else if (flags & WINEDDFLIP_INTERVAL3)
3625         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_THREE;
3626     else
3627         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_FOUR;
3628
3629     return wined3d_swapchain_present(swapchain, NULL, NULL, swapchain->win_handle, NULL, 0);
3630 }
3631
3632 /* Do not call while under the GL lock. */
3633 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3634 {
3635     struct wined3d_device *device = surface->resource.device;
3636
3637     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3638
3639     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3640     {
3641         struct wined3d_texture *texture = surface->container.u.texture;
3642
3643         TRACE("Passing to container (%p).\n", texture);
3644         texture->texture_ops->texture_preload(texture, srgb);
3645     }
3646     else
3647     {
3648         struct wined3d_context *context = NULL;
3649
3650         TRACE("(%p) : About to load surface\n", surface);
3651
3652         if (!device->isInDraw) context = context_acquire(device, NULL);
3653
3654         if (surface->resource.format->id == WINED3DFMT_P8_UINT
3655                 || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3656         {
3657             if (palette9_changed(surface))
3658             {
3659                 TRACE("Reloading surface because the d3d8/9 palette was changed\n");
3660                 /* TODO: This is not necessarily needed with hw palettized texture support */
3661                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3662                 /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
3663                 surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
3664             }
3665         }
3666
3667         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3668
3669         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3670         {
3671             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3672             GLclampf tmp;
3673             tmp = 0.9f;
3674             ENTER_GL();
3675             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3676             LEAVE_GL();
3677         }
3678
3679         if (context) context_release(context);
3680     }
3681 }
3682
3683 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3684 {
3685     if (!surface->resource.allocatedMemory)
3686     {
3687         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3688                 surface->resource.size + RESOURCE_ALIGNMENT);
3689         if (!surface->resource.heapMemory)
3690         {
3691             ERR("Out of memory\n");
3692             return FALSE;
3693         }
3694         surface->resource.allocatedMemory =
3695             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3696     }
3697     else
3698     {
3699         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3700     }
3701
3702     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3703
3704     return TRUE;
3705 }
3706
3707 /* Read the framebuffer back into the surface */
3708 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3709 {
3710     struct wined3d_device *device = surface->resource.device;
3711     const struct wined3d_gl_info *gl_info;
3712     struct wined3d_context *context;
3713     BYTE *mem;
3714     GLint fmt;
3715     GLint type;
3716     BYTE *row, *top, *bottom;
3717     int i;
3718     BOOL bpp;
3719     RECT local_rect;
3720     BOOL srcIsUpsideDown;
3721     GLint rowLen = 0;
3722     GLint skipPix = 0;
3723     GLint skipRow = 0;
3724
3725     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3726         static BOOL warned = FALSE;
3727         if(!warned) {
3728             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3729             warned = TRUE;
3730         }
3731         return;
3732     }
3733
3734     context = context_acquire(device, surface);
3735     context_apply_blit_state(context, device);
3736     gl_info = context->gl_info;
3737
3738     ENTER_GL();
3739
3740     /* Select the correct read buffer, and give some debug output.
3741      * There is no need to keep track of the current read buffer or reset it, every part of the code
3742      * that reads sets the read buffer as desired.
3743      */
3744     if (surface_is_offscreen(surface))
3745     {
3746         /* Mapping the primary render target which is not on a swapchain.
3747          * Read from the back buffer. */
3748         TRACE("Mapping offscreen render target.\n");
3749         glReadBuffer(device->offscreenBuffer);
3750         srcIsUpsideDown = TRUE;
3751     }
3752     else
3753     {
3754         /* Onscreen surfaces are always part of a swapchain */
3755         GLenum buffer = surface_get_gl_buffer(surface);
3756         TRACE("Mapping %#x buffer.\n", buffer);
3757         glReadBuffer(buffer);
3758         checkGLcall("glReadBuffer");
3759         srcIsUpsideDown = FALSE;
3760     }
3761
3762     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3763     if (!rect)
3764     {
3765         local_rect.left = 0;
3766         local_rect.top = 0;
3767         local_rect.right = surface->resource.width;
3768         local_rect.bottom = surface->resource.height;
3769     }
3770     else
3771     {
3772         local_rect = *rect;
3773     }
3774     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3775
3776     switch (surface->resource.format->id)
3777     {
3778         case WINED3DFMT_P8_UINT:
3779         {
3780             if (primary_render_target_is_p8(device))
3781             {
3782                 /* In case of P8 render targets the index is stored in the alpha component */
3783                 fmt = GL_ALPHA;
3784                 type = GL_UNSIGNED_BYTE;
3785                 mem = dest;
3786                 bpp = surface->resource.format->byte_count;
3787             }
3788             else
3789             {
3790                 /* GL can't return palettized data, so read ARGB pixels into a
3791                  * separate block of memory and convert them into palettized format
3792                  * in software. Slow, but if the app means to use palettized render
3793                  * targets and locks it...
3794                  *
3795                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
3796                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
3797                  * for the color channels when palettizing the colors.
3798                  */
3799                 fmt = GL_RGB;
3800                 type = GL_UNSIGNED_BYTE;
3801                 pitch *= 3;
3802                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
3803                 if (!mem)
3804                 {
3805                     ERR("Out of memory\n");
3806                     LEAVE_GL();
3807                     return;
3808                 }
3809                 bpp = surface->resource.format->byte_count * 3;
3810             }
3811         }
3812         break;
3813
3814         default:
3815             mem = dest;
3816             fmt = surface->resource.format->glFormat;
3817             type = surface->resource.format->glType;
3818             bpp = surface->resource.format->byte_count;
3819     }
3820
3821     if (surface->flags & SFLAG_PBO)
3822     {
3823         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
3824         checkGLcall("glBindBufferARB");
3825         if (mem)
3826         {
3827             ERR("mem not null for pbo -- unexpected\n");
3828             mem = NULL;
3829         }
3830     }
3831
3832     /* Save old pixel store pack state */
3833     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
3834     checkGLcall("glGetIntegerv");
3835     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
3836     checkGLcall("glGetIntegerv");
3837     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
3838     checkGLcall("glGetIntegerv");
3839
3840     /* Setup pixel store pack state -- to glReadPixels into the correct place */
3841     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
3842     checkGLcall("glPixelStorei");
3843     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
3844     checkGLcall("glPixelStorei");
3845     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
3846     checkGLcall("glPixelStorei");
3847
3848     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
3849             local_rect.right - local_rect.left,
3850             local_rect.bottom - local_rect.top,
3851             fmt, type, mem);
3852     checkGLcall("glReadPixels");
3853
3854     /* Reset previous pixel store pack state */
3855     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
3856     checkGLcall("glPixelStorei");
3857     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
3858     checkGLcall("glPixelStorei");
3859     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
3860     checkGLcall("glPixelStorei");
3861
3862     if (surface->flags & SFLAG_PBO)
3863     {
3864         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
3865         checkGLcall("glBindBufferARB");
3866
3867         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
3868          * to get a pointer to it and perform the flipping in software. This is a lot
3869          * faster than calling glReadPixels for each line. In case we want more speed
3870          * we should rerender it flipped in a FBO and read the data back from the FBO. */
3871         if (!srcIsUpsideDown)
3872         {
3873             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
3874             checkGLcall("glBindBufferARB");
3875
3876             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
3877             checkGLcall("glMapBufferARB");
3878         }
3879     }
3880
3881     /* TODO: Merge this with the palettization loop below for P8 targets */
3882     if(!srcIsUpsideDown) {
3883         UINT len, off;
3884         /* glReadPixels returns the image upside down, and there is no way to prevent this.
3885             Flip the lines in software */
3886         len = (local_rect.right - local_rect.left) * bpp;
3887         off = local_rect.left * bpp;
3888
3889         row = HeapAlloc(GetProcessHeap(), 0, len);
3890         if(!row) {
3891             ERR("Out of memory\n");
3892             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
3893                 HeapFree(GetProcessHeap(), 0, mem);
3894             LEAVE_GL();
3895             return;
3896         }
3897
3898         top = mem + pitch * local_rect.top;
3899         bottom = mem + pitch * (local_rect.bottom - 1);
3900         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
3901             memcpy(row, top + off, len);
3902             memcpy(top + off, bottom + off, len);
3903             memcpy(bottom + off, row, len);
3904             top += pitch;
3905             bottom -= pitch;
3906         }
3907         HeapFree(GetProcessHeap(), 0, row);
3908
3909         /* Unmap the temp PBO buffer */
3910         if (surface->flags & SFLAG_PBO)
3911         {
3912             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
3913             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
3914         }
3915     }
3916
3917     LEAVE_GL();
3918     context_release(context);
3919
3920     /* For P8 textures we need to perform an inverse palette lookup. This is
3921      * done by searching for a palette index which matches the RGB value.
3922      * Note this isn't guaranteed to work when there are multiple entries for
3923      * the same color but we have no choice. In case of P8 render targets,
3924      * the index is stored in the alpha component so no conversion is needed. */
3925     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
3926     {
3927         const PALETTEENTRY *pal = NULL;
3928         DWORD width = pitch / 3;
3929         int x, y, c;
3930
3931         if (surface->palette)
3932         {
3933             pal = surface->palette->palents;
3934         }
3935         else
3936         {
3937             ERR("Palette is missing, cannot perform inverse palette lookup\n");
3938             HeapFree(GetProcessHeap(), 0, mem);
3939             return;
3940         }
3941
3942         for(y = local_rect.top; y < local_rect.bottom; y++) {
3943             for(x = local_rect.left; x < local_rect.right; x++) {
3944                 /*                      start              lines            pixels      */
3945                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
3946                 const BYTE *green = blue  + 1;
3947                 const BYTE *red = green + 1;
3948
3949                 for(c = 0; c < 256; c++) {
3950                     if(*red   == pal[c].peRed   &&
3951                        *green == pal[c].peGreen &&
3952                        *blue  == pal[c].peBlue)
3953                     {
3954                         *((BYTE *) dest + y * width + x) = c;
3955                         break;
3956                     }
3957                 }
3958             }
3959         }
3960         HeapFree(GetProcessHeap(), 0, mem);
3961     }
3962 }
3963
3964 /* Read the framebuffer contents into a texture */
3965 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
3966 {
3967     struct wined3d_device *device = surface->resource.device;
3968     const struct wined3d_gl_info *gl_info;
3969     struct wined3d_context *context;
3970
3971     if (!surface_is_offscreen(surface))
3972     {
3973         /* We would need to flip onscreen surfaces, but there's no efficient
3974          * way to do that here. It makes more sense for the caller to
3975          * explicitly go through sysmem. */
3976         ERR("Not supported for onscreen targets.\n");
3977         return;
3978     }
3979
3980     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
3981      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
3982      * states in the stateblock, and no driver was found yet that had bugs in that regard.
3983      */
3984     context = context_acquire(device, surface);
3985     gl_info = context->gl_info;
3986
3987     surface_prepare_texture(surface, gl_info, srgb);
3988     surface_bind_and_dirtify(surface, gl_info, srgb);
3989
3990     TRACE("Reading back offscreen render target %p.\n", surface);
3991
3992     ENTER_GL();
3993
3994     glReadBuffer(device->offscreenBuffer);
3995     checkGLcall("glReadBuffer");
3996
3997     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
3998             0, 0, 0, 0, surface->resource.width, surface->resource.height);
3999     checkGLcall("glCopyTexSubImage2D");
4000
4001     LEAVE_GL();
4002
4003     context_release(context);
4004 }
4005
4006 /* Context activation is done by the caller. */
4007 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4008         const struct wined3d_gl_info *gl_info, BOOL srgb)
4009 {
4010     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4011     CONVERT_TYPES convert;
4012     struct wined3d_format format;
4013
4014     if (surface->flags & alloc_flag) return;
4015
4016     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4017     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4018     else surface->flags &= ~SFLAG_CONVERTED;
4019
4020     surface_bind_and_dirtify(surface, gl_info, srgb);
4021     surface_allocate_surface(surface, gl_info, &format, srgb);
4022     surface->flags |= alloc_flag;
4023 }
4024
4025 /* Context activation is done by the caller. */
4026 void surface_prepare_texture(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
4027 {
4028     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4029     {
4030         struct wined3d_texture *texture = surface->container.u.texture;
4031         UINT sub_count = texture->level_count * texture->layer_count;
4032         UINT i;
4033
4034         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4035
4036         for (i = 0; i < sub_count; ++i)
4037         {
4038             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4039             surface_prepare_texture_internal(s, gl_info, srgb);
4040         }
4041
4042         return;
4043     }
4044
4045     surface_prepare_texture_internal(surface, gl_info, srgb);
4046 }
4047
4048 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4049         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4050 {
4051     struct wined3d_device *device = surface->resource.device;
4052     UINT pitch = wined3d_surface_get_pitch(surface);
4053     const struct wined3d_gl_info *gl_info;
4054     struct wined3d_context *context;
4055     RECT local_rect;
4056     UINT w, h;
4057
4058     surface_get_rect(surface, rect, &local_rect);
4059
4060     mem += local_rect.top * pitch + local_rect.left * bpp;
4061     w = local_rect.right - local_rect.left;
4062     h = local_rect.bottom - local_rect.top;
4063
4064     /* Activate the correct context for the render target */
4065     context = context_acquire(device, surface);
4066     context_apply_blit_state(context, device);
4067     gl_info = context->gl_info;
4068
4069     ENTER_GL();
4070
4071     if (!surface_is_offscreen(surface))
4072     {
4073         GLenum buffer = surface_get_gl_buffer(surface);
4074         TRACE("Unlocking %#x buffer.\n", buffer);
4075         context_set_draw_buffer(context, buffer);
4076
4077         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4078         glPixelZoom(1.0f, -1.0f);
4079     }
4080     else
4081     {
4082         /* Primary offscreen render target */
4083         TRACE("Offscreen render target.\n");
4084         context_set_draw_buffer(context, device->offscreenBuffer);
4085
4086         glPixelZoom(1.0f, 1.0f);
4087     }
4088
4089     glRasterPos3i(local_rect.left, local_rect.top, 1);
4090     checkGLcall("glRasterPos3i");
4091
4092     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4093     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4094
4095     if (surface->flags & SFLAG_PBO)
4096     {
4097         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4098         checkGLcall("glBindBufferARB");
4099     }
4100
4101     glDrawPixels(w, h, fmt, type, mem);
4102     checkGLcall("glDrawPixels");
4103
4104     if (surface->flags & SFLAG_PBO)
4105     {
4106         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4107         checkGLcall("glBindBufferARB");
4108     }
4109
4110     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4111     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4112
4113     LEAVE_GL();
4114
4115     if (wined3d_settings.strict_draw_ordering
4116             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4117             && surface->container.u.swapchain->front_buffer == surface))
4118         wglFlush();
4119
4120     context_release(context);
4121 }
4122
4123 HRESULT d3dfmt_get_conv(struct wined3d_surface *surface, BOOL need_alpha_ck,
4124         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4125 {
4126     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4127     struct wined3d_device *device = surface->resource.device;
4128     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4129     BOOL blit_supported = FALSE;
4130
4131     /* Copy the default values from the surface. Below we might perform fixups */
4132     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4133     *format = *surface->resource.format;
4134     *convert = NO_CONVERSION;
4135
4136     /* Ok, now look if we have to do any conversion */
4137     switch (surface->resource.format->id)
4138     {
4139         case WINED3DFMT_P8_UINT:
4140             /* Below the call to blit_supported is disabled for Wine 1.2
4141              * because the function isn't operating correctly yet. At the
4142              * moment 8-bit blits are handled in software and if certain GL
4143              * extensions are around, surface conversion is performed at
4144              * upload time. The blit_supported call recognizes it as a
4145              * destination fixup. This type of upload 'fixup' and 8-bit to
4146              * 8-bit blits need to be handled by the blit_shader.
4147              * TODO: get rid of this #if 0. */
4148 #if 0
4149             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4150                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4151                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4152 #endif
4153             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4154
4155             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4156              * texturing. Further also use conversion in case of color keying.
4157              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4158              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4159              * conflicts with this.
4160              */
4161             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4162                     || colorkey_active || !use_texturing)
4163             {
4164                 format->glFormat = GL_RGBA;
4165                 format->glInternal = GL_RGBA;
4166                 format->glType = GL_UNSIGNED_BYTE;
4167                 format->conv_byte_count = 4;
4168                 if (colorkey_active)
4169                     *convert = CONVERT_PALETTED_CK;
4170                 else
4171                     *convert = CONVERT_PALETTED;
4172             }
4173             break;
4174
4175         case WINED3DFMT_B2G3R3_UNORM:
4176             /* **********************
4177                 GL_UNSIGNED_BYTE_3_3_2
4178                 ********************** */
4179             if (colorkey_active) {
4180                 /* This texture format will never be used.. So do not care about color keying
4181                     up until the point in time it will be needed :-) */
4182                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4183             }
4184             break;
4185
4186         case WINED3DFMT_B5G6R5_UNORM:
4187             if (colorkey_active)
4188             {
4189                 *convert = CONVERT_CK_565;
4190                 format->glFormat = GL_RGBA;
4191                 format->glInternal = GL_RGB5_A1;
4192                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4193                 format->conv_byte_count = 2;
4194             }
4195             break;
4196
4197         case WINED3DFMT_B5G5R5X1_UNORM:
4198             if (colorkey_active)
4199             {
4200                 *convert = CONVERT_CK_5551;
4201                 format->glFormat = GL_BGRA;
4202                 format->glInternal = GL_RGB5_A1;
4203                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4204                 format->conv_byte_count = 2;
4205             }
4206             break;
4207
4208         case WINED3DFMT_B8G8R8_UNORM:
4209             if (colorkey_active)
4210             {
4211                 *convert = CONVERT_CK_RGB24;
4212                 format->glFormat = GL_RGBA;
4213                 format->glInternal = GL_RGBA8;
4214                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4215                 format->conv_byte_count = 4;
4216             }
4217             break;
4218
4219         case WINED3DFMT_B8G8R8X8_UNORM:
4220             if (colorkey_active)
4221             {
4222                 *convert = CONVERT_RGB32_888;
4223                 format->glFormat = GL_RGBA;
4224                 format->glInternal = GL_RGBA8;
4225                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4226                 format->conv_byte_count = 4;
4227             }
4228             break;
4229
4230         default:
4231             break;
4232     }
4233
4234     return WINED3D_OK;
4235 }
4236
4237 void d3dfmt_p8_init_palette(struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4238 {
4239     struct wined3d_device *device = surface->resource.device;
4240     struct wined3d_palette *pal = surface->palette;
4241     BOOL index_in_alpha = FALSE;
4242     unsigned int i;
4243
4244     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4245      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4246      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4247      * duplicate entries. Store the color key in the unused alpha component to speed the
4248      * download up and to make conversion unneeded. */
4249     index_in_alpha = primary_render_target_is_p8(device);
4250
4251     if (!pal)
4252     {
4253         UINT dxVersion = device->wined3d->dxVersion;
4254
4255         /* In DirectDraw the palette is a property of the surface, there are no such things as device palettes. */
4256         if (dxVersion <= 7)
4257         {
4258             ERR("This code should never get entered for DirectDraw!, expect problems\n");
4259             if (index_in_alpha)
4260             {
4261                 /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4262                  * there's no palette at this time. */
4263                 for (i = 0; i < 256; i++) table[i][3] = i;
4264             }
4265         }
4266         else
4267         {
4268             /* Direct3D >= 8 palette usage style: P8 textures use device palettes, palette entry format is A8R8G8B8,
4269              * alpha is stored in peFlags and may be used by the app if D3DPTEXTURECAPS_ALPHAPALETTE device
4270              * capability flag is present (wine does advertise this capability) */
4271             for (i = 0; i < 256; ++i)
4272             {
4273                 table[i][0] = device->palettes[device->currentPalette][i].peRed;
4274                 table[i][1] = device->palettes[device->currentPalette][i].peGreen;
4275                 table[i][2] = device->palettes[device->currentPalette][i].peBlue;
4276                 table[i][3] = device->palettes[device->currentPalette][i].peFlags;
4277             }
4278         }
4279     }
4280     else
4281     {
4282         TRACE("Using surface palette %p\n", pal);
4283         /* Get the surface's palette */
4284         for (i = 0; i < 256; ++i)
4285         {
4286             table[i][0] = pal->palents[i].peRed;
4287             table[i][1] = pal->palents[i].peGreen;
4288             table[i][2] = pal->palents[i].peBlue;
4289
4290             /* When index_in_alpha is set the palette index is stored in the
4291              * alpha component. In case of a readback we can then read
4292              * GL_ALPHA. Color keying is handled in BltOverride using a
4293              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4294              * color key itself is passed to glAlphaFunc in other cases the
4295              * alpha component of pixels that should be masked away is set to 0. */
4296             if (index_in_alpha)
4297             {
4298                 table[i][3] = i;
4299             }
4300             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4301                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4302             {
4303                 table[i][3] = 0x00;
4304             }
4305             else if (pal->flags & WINEDDPCAPS_ALPHA)
4306             {
4307                 table[i][3] = pal->palents[i].peFlags;
4308             }
4309             else
4310             {
4311                 table[i][3] = 0xFF;
4312             }
4313         }
4314     }
4315 }
4316
4317 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4318         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4319 {
4320     const BYTE *source;
4321     BYTE *dest;
4322     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4323
4324     switch (convert) {
4325         case NO_CONVERSION:
4326         {
4327             memcpy(dst, src, pitch * height);
4328             break;
4329         }
4330         case CONVERT_PALETTED:
4331         case CONVERT_PALETTED_CK:
4332         {
4333             BYTE table[256][4];
4334             unsigned int x, y;
4335
4336             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4337
4338             for (y = 0; y < height; y++)
4339             {
4340                 source = src + pitch * y;
4341                 dest = dst + outpitch * y;
4342                 /* This is an 1 bpp format, using the width here is fine */
4343                 for (x = 0; x < width; x++) {
4344                     BYTE color = *source++;
4345                     *dest++ = table[color][0];
4346                     *dest++ = table[color][1];
4347                     *dest++ = table[color][2];
4348                     *dest++ = table[color][3];
4349                 }
4350             }
4351         }
4352         break;
4353
4354         case CONVERT_CK_565:
4355         {
4356             /* Converting the 565 format in 5551 packed to emulate color-keying.
4357
4358               Note : in all these conversion, it would be best to average the averaging
4359                       pixels to get the color of the pixel that will be color-keyed to
4360                       prevent 'color bleeding'. This will be done later on if ever it is
4361                       too visible.
4362
4363               Note2: Nvidia documents say that their driver does not support alpha + color keying
4364                      on the same surface and disables color keying in such a case
4365             */
4366             unsigned int x, y;
4367             const WORD *Source;
4368             WORD *Dest;
4369
4370             TRACE("Color keyed 565\n");
4371
4372             for (y = 0; y < height; y++) {
4373                 Source = (const WORD *)(src + y * pitch);
4374                 Dest = (WORD *) (dst + y * outpitch);
4375                 for (x = 0; x < width; x++ ) {
4376                     WORD color = *Source++;
4377                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4378                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4379                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4380                         *Dest |= 0x0001;
4381                     Dest++;
4382                 }
4383             }
4384         }
4385         break;
4386
4387         case CONVERT_CK_5551:
4388         {
4389             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4390             unsigned int x, y;
4391             const WORD *Source;
4392             WORD *Dest;
4393             TRACE("Color keyed 5551\n");
4394             for (y = 0; y < height; y++) {
4395                 Source = (const WORD *)(src + y * pitch);
4396                 Dest = (WORD *) (dst + y * outpitch);
4397                 for (x = 0; x < width; x++ ) {
4398                     WORD color = *Source++;
4399                     *Dest = color;
4400                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4401                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4402                         *Dest |= (1 << 15);
4403                     else
4404                         *Dest &= ~(1 << 15);
4405                     Dest++;
4406                 }
4407             }
4408         }
4409         break;
4410
4411         case CONVERT_CK_RGB24:
4412         {
4413             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4414             unsigned int x, y;
4415             for (y = 0; y < height; y++)
4416             {
4417                 source = src + pitch * y;
4418                 dest = dst + outpitch * y;
4419                 for (x = 0; x < width; x++) {
4420                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4421                     DWORD dstcolor = color << 8;
4422                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4423                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4424                         dstcolor |= 0xff;
4425                     *(DWORD*)dest = dstcolor;
4426                     source += 3;
4427                     dest += 4;
4428                 }
4429             }
4430         }
4431         break;
4432
4433         case CONVERT_RGB32_888:
4434         {
4435             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4436             unsigned int x, y;
4437             for (y = 0; y < height; y++)
4438             {
4439                 source = src + pitch * y;
4440                 dest = dst + outpitch * y;
4441                 for (x = 0; x < width; x++) {
4442                     DWORD color = 0xffffff & *(const DWORD*)source;
4443                     DWORD dstcolor = color << 8;
4444                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4445                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4446                         dstcolor |= 0xff;
4447                     *(DWORD*)dest = dstcolor;
4448                     source += 4;
4449                     dest += 4;
4450                 }
4451             }
4452         }
4453         break;
4454
4455         default:
4456             ERR("Unsupported conversion type %#x.\n", convert);
4457     }
4458     return WINED3D_OK;
4459 }
4460
4461 BOOL palette9_changed(struct wined3d_surface *surface)
4462 {
4463     struct wined3d_device *device = surface->resource.device;
4464
4465     if (surface->palette || (surface->resource.format->id != WINED3DFMT_P8_UINT
4466             && surface->resource.format->id != WINED3DFMT_P8_UINT_A8_UNORM))
4467     {
4468         /* If a ddraw-style palette is attached assume no d3d9 palette change.
4469          * Also the palette isn't interesting if the surface format isn't P8 or A8P8
4470          */
4471         return FALSE;
4472     }
4473
4474     if (surface->palette9)
4475     {
4476         if (!memcmp(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256))
4477         {
4478             return FALSE;
4479         }
4480     }
4481     else
4482     {
4483         surface->palette9 = HeapAlloc(GetProcessHeap(), 0, sizeof(PALETTEENTRY) * 256);
4484     }
4485     memcpy(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256);
4486
4487     return TRUE;
4488 }
4489
4490 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4491 {
4492     /* Flip the surface contents */
4493     /* Flip the DC */
4494     {
4495         HDC tmp;
4496         tmp = front->hDC;
4497         front->hDC = back->hDC;
4498         back->hDC = tmp;
4499     }
4500
4501     /* Flip the DIBsection */
4502     {
4503         HBITMAP tmp;
4504         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4505         tmp = front->dib.DIBsection;
4506         front->dib.DIBsection = back->dib.DIBsection;
4507         back->dib.DIBsection = tmp;
4508
4509         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4510         else front->flags &= ~SFLAG_DIBSECTION;
4511         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4512         else back->flags &= ~SFLAG_DIBSECTION;
4513     }
4514
4515     /* Flip the surface data */
4516     {
4517         void* tmp;
4518
4519         tmp = front->dib.bitmap_data;
4520         front->dib.bitmap_data = back->dib.bitmap_data;
4521         back->dib.bitmap_data = tmp;
4522
4523         tmp = front->resource.allocatedMemory;
4524         front->resource.allocatedMemory = back->resource.allocatedMemory;
4525         back->resource.allocatedMemory = tmp;
4526
4527         tmp = front->resource.heapMemory;
4528         front->resource.heapMemory = back->resource.heapMemory;
4529         back->resource.heapMemory = tmp;
4530     }
4531
4532     /* Flip the PBO */
4533     {
4534         GLuint tmp_pbo = front->pbo;
4535         front->pbo = back->pbo;
4536         back->pbo = tmp_pbo;
4537     }
4538
4539     /* client_memory should not be different, but just in case */
4540     {
4541         BOOL tmp;
4542         tmp = front->dib.client_memory;
4543         front->dib.client_memory = back->dib.client_memory;
4544         back->dib.client_memory = tmp;
4545     }
4546
4547     /* Flip the opengl texture */
4548     {
4549         GLuint tmp;
4550
4551         tmp = back->texture_name;
4552         back->texture_name = front->texture_name;
4553         front->texture_name = tmp;
4554
4555         tmp = back->texture_name_srgb;
4556         back->texture_name_srgb = front->texture_name_srgb;
4557         front->texture_name_srgb = tmp;
4558
4559         resource_unload(&back->resource);
4560         resource_unload(&front->resource);
4561     }
4562
4563     {
4564         DWORD tmp_flags = back->flags;
4565         back->flags = front->flags;
4566         front->flags = tmp_flags;
4567     }
4568 }
4569
4570 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4571  * pixel copy calls. */
4572 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4573         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4574 {
4575     struct wined3d_device *device = dst_surface->resource.device;
4576     float xrel, yrel;
4577     UINT row;
4578     struct wined3d_context *context;
4579     BOOL upsidedown = FALSE;
4580     RECT dst_rect = *dst_rect_in;
4581
4582     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4583      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4584      */
4585     if(dst_rect.top > dst_rect.bottom) {
4586         UINT tmp = dst_rect.bottom;
4587         dst_rect.bottom = dst_rect.top;
4588         dst_rect.top = tmp;
4589         upsidedown = TRUE;
4590     }
4591
4592     context = context_acquire(device, src_surface);
4593     context_apply_blit_state(context, device);
4594     surface_internal_preload(dst_surface, SRGB_RGB);
4595     ENTER_GL();
4596
4597     /* Bind the target texture */
4598     glBindTexture(dst_surface->texture_target, dst_surface->texture_name);
4599     checkGLcall("glBindTexture");
4600     if (surface_is_offscreen(src_surface))
4601     {
4602         TRACE("Reading from an offscreen target\n");
4603         upsidedown = !upsidedown;
4604         glReadBuffer(device->offscreenBuffer);
4605     }
4606     else
4607     {
4608         glReadBuffer(surface_get_gl_buffer(src_surface));
4609     }
4610     checkGLcall("glReadBuffer");
4611
4612     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4613     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4614
4615     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4616     {
4617         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4618
4619         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4620             ERR("Texture filtering not supported in direct blit\n");
4621         }
4622     }
4623     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4624             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4625     {
4626         ERR("Texture filtering not supported in direct blit\n");
4627     }
4628
4629     if (upsidedown
4630             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4631             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4632     {
4633         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4634
4635         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4636                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4637                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4638                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4639     }
4640     else
4641     {
4642         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4643         /* I have to process this row by row to swap the image,
4644          * otherwise it would be upside down, so stretching in y direction
4645          * doesn't cost extra time
4646          *
4647          * However, stretching in x direction can be avoided if not necessary
4648          */
4649         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4650             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4651             {
4652                 /* Well, that stuff works, but it's very slow.
4653                  * find a better way instead
4654                  */
4655                 UINT col;
4656
4657                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4658                 {
4659                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4660                             dst_rect.left + col /* x offset */, row /* y offset */,
4661                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4662                 }
4663             }
4664             else
4665             {
4666                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4667                         dst_rect.left /* x offset */, row /* y offset */,
4668                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4669             }
4670         }
4671     }
4672     checkGLcall("glCopyTexSubImage2D");
4673
4674     LEAVE_GL();
4675     context_release(context);
4676
4677     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4678      * path is never entered
4679      */
4680     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4681 }
4682
4683 /* Uses the hardware to stretch and flip the image */
4684 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4685         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4686 {
4687     struct wined3d_device *device = dst_surface->resource.device;
4688     struct wined3d_swapchain *src_swapchain = NULL;
4689     GLuint src, backup = 0;
4690     float left, right, top, bottom; /* Texture coordinates */
4691     UINT fbwidth = src_surface->resource.width;
4692     UINT fbheight = src_surface->resource.height;
4693     struct wined3d_context *context;
4694     GLenum drawBuffer = GL_BACK;
4695     GLenum texture_target;
4696     BOOL noBackBufferBackup;
4697     BOOL src_offscreen;
4698     BOOL upsidedown = FALSE;
4699     RECT dst_rect = *dst_rect_in;
4700
4701     TRACE("Using hwstretch blit\n");
4702     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4703     context = context_acquire(device, src_surface);
4704     context_apply_blit_state(context, device);
4705     surface_internal_preload(dst_surface, SRGB_RGB);
4706
4707     src_offscreen = surface_is_offscreen(src_surface);
4708     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4709     if (!noBackBufferBackup && !src_surface->texture_name)
4710     {
4711         /* Get it a description */
4712         surface_internal_preload(src_surface, SRGB_RGB);
4713     }
4714     ENTER_GL();
4715
4716     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4717      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4718      */
4719     if (context->aux_buffers >= 2)
4720     {
4721         /* Got more than one aux buffer? Use the 2nd aux buffer */
4722         drawBuffer = GL_AUX1;
4723     }
4724     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4725     {
4726         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4727         drawBuffer = GL_AUX0;
4728     }
4729
4730     if(noBackBufferBackup) {
4731         glGenTextures(1, &backup);
4732         checkGLcall("glGenTextures");
4733         glBindTexture(GL_TEXTURE_2D, backup);
4734         checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4735         texture_target = GL_TEXTURE_2D;
4736     } else {
4737         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4738          * we are reading from the back buffer, the backup can be used as source texture
4739          */
4740         texture_target = src_surface->texture_target;
4741         glBindTexture(texture_target, src_surface->texture_name);
4742         checkGLcall("glBindTexture(texture_target, src_surface->texture_name)");
4743         glEnable(texture_target);
4744         checkGLcall("glEnable(texture_target)");
4745
4746         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4747         src_surface->flags &= ~SFLAG_INTEXTURE;
4748     }
4749
4750     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4751      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4752      */
4753     if(dst_rect.top > dst_rect.bottom) {
4754         UINT tmp = dst_rect.bottom;
4755         dst_rect.bottom = dst_rect.top;
4756         dst_rect.top = tmp;
4757         upsidedown = TRUE;
4758     }
4759
4760     if (src_offscreen)
4761     {
4762         TRACE("Reading from an offscreen target\n");
4763         upsidedown = !upsidedown;
4764         glReadBuffer(device->offscreenBuffer);
4765     }
4766     else
4767     {
4768         glReadBuffer(surface_get_gl_buffer(src_surface));
4769     }
4770
4771     /* TODO: Only back up the part that will be overwritten */
4772     glCopyTexSubImage2D(texture_target, 0,
4773                         0, 0 /* read offsets */,
4774                         0, 0,
4775                         fbwidth,
4776                         fbheight);
4777
4778     checkGLcall("glCopyTexSubImage2D");
4779
4780     /* No issue with overriding these - the sampler is dirty due to blit usage */
4781     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
4782             wined3d_gl_mag_filter(magLookup, Filter));
4783     checkGLcall("glTexParameteri");
4784     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
4785             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
4786     checkGLcall("glTexParameteri");
4787
4788     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4789         src_swapchain = src_surface->container.u.swapchain;
4790     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
4791     {
4792         src = backup ? backup : src_surface->texture_name;
4793     }
4794     else
4795     {
4796         glReadBuffer(GL_FRONT);
4797         checkGLcall("glReadBuffer(GL_FRONT)");
4798
4799         glGenTextures(1, &src);
4800         checkGLcall("glGenTextures(1, &src)");
4801         glBindTexture(GL_TEXTURE_2D, src);
4802         checkGLcall("glBindTexture(GL_TEXTURE_2D, src)");
4803
4804         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
4805          * out for power of 2 sizes
4806          */
4807         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
4808                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
4809         checkGLcall("glTexImage2D");
4810         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
4811                             0, 0 /* read offsets */,
4812                             0, 0,
4813                             fbwidth,
4814                             fbheight);
4815
4816         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
4817         checkGLcall("glTexParameteri");
4818         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
4819         checkGLcall("glTexParameteri");
4820
4821         glReadBuffer(GL_BACK);
4822         checkGLcall("glReadBuffer(GL_BACK)");
4823
4824         if(texture_target != GL_TEXTURE_2D) {
4825             glDisable(texture_target);
4826             glEnable(GL_TEXTURE_2D);
4827             texture_target = GL_TEXTURE_2D;
4828         }
4829     }
4830     checkGLcall("glEnd and previous");
4831
4832     left = src_rect->left;
4833     right = src_rect->right;
4834
4835     if (!upsidedown)
4836     {
4837         top = src_surface->resource.height - src_rect->top;
4838         bottom = src_surface->resource.height - src_rect->bottom;
4839     }
4840     else
4841     {
4842         top = src_surface->resource.height - src_rect->bottom;
4843         bottom = src_surface->resource.height - src_rect->top;
4844     }
4845
4846     if (src_surface->flags & SFLAG_NORMCOORD)
4847     {
4848         left /= src_surface->pow2Width;
4849         right /= src_surface->pow2Width;
4850         top /= src_surface->pow2Height;
4851         bottom /= src_surface->pow2Height;
4852     }
4853
4854     /* draw the source texture stretched and upside down. The correct surface is bound already */
4855     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
4856     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
4857
4858     context_set_draw_buffer(context, drawBuffer);
4859     glReadBuffer(drawBuffer);
4860
4861     glBegin(GL_QUADS);
4862         /* bottom left */
4863         glTexCoord2f(left, bottom);
4864         glVertex2i(0, 0);
4865
4866         /* top left */
4867         glTexCoord2f(left, top);
4868         glVertex2i(0, dst_rect.bottom - dst_rect.top);
4869
4870         /* top right */
4871         glTexCoord2f(right, top);
4872         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4873
4874         /* bottom right */
4875         glTexCoord2f(right, bottom);
4876         glVertex2i(dst_rect.right - dst_rect.left, 0);
4877     glEnd();
4878     checkGLcall("glEnd and previous");
4879
4880     if (texture_target != dst_surface->texture_target)
4881     {
4882         glDisable(texture_target);
4883         glEnable(dst_surface->texture_target);
4884         texture_target = dst_surface->texture_target;
4885     }
4886
4887     /* Now read the stretched and upside down image into the destination texture */
4888     glBindTexture(texture_target, dst_surface->texture_name);
4889     checkGLcall("glBindTexture");
4890     glCopyTexSubImage2D(texture_target,
4891                         0,
4892                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
4893                         0, 0, /* We blitted the image to the origin */
4894                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4895     checkGLcall("glCopyTexSubImage2D");
4896
4897     if(drawBuffer == GL_BACK) {
4898         /* Write the back buffer backup back */
4899         if(backup) {
4900             if(texture_target != GL_TEXTURE_2D) {
4901                 glDisable(texture_target);
4902                 glEnable(GL_TEXTURE_2D);
4903                 texture_target = GL_TEXTURE_2D;
4904             }
4905             glBindTexture(GL_TEXTURE_2D, backup);
4906             checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4907         }
4908         else
4909         {
4910             if (texture_target != src_surface->texture_target)
4911             {
4912                 glDisable(texture_target);
4913                 glEnable(src_surface->texture_target);
4914                 texture_target = src_surface->texture_target;
4915             }
4916             glBindTexture(src_surface->texture_target, src_surface->texture_name);
4917             checkGLcall("glBindTexture(src_surface->texture_target, src_surface->texture_name)");
4918         }
4919
4920         glBegin(GL_QUADS);
4921             /* top left */
4922             glTexCoord2f(0.0f, 0.0f);
4923             glVertex2i(0, fbheight);
4924
4925             /* bottom left */
4926             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
4927             glVertex2i(0, 0);
4928
4929             /* bottom right */
4930             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
4931                     (float)fbheight / (float)src_surface->pow2Height);
4932             glVertex2i(fbwidth, 0);
4933
4934             /* top right */
4935             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
4936             glVertex2i(fbwidth, fbheight);
4937         glEnd();
4938     }
4939     glDisable(texture_target);
4940     checkGLcall("glDisable(texture_target)");
4941
4942     /* Cleanup */
4943     if (src != src_surface->texture_name && src != backup)
4944     {
4945         glDeleteTextures(1, &src);
4946         checkGLcall("glDeleteTextures(1, &src)");
4947     }
4948     if(backup) {
4949         glDeleteTextures(1, &backup);
4950         checkGLcall("glDeleteTextures(1, &backup)");
4951     }
4952
4953     LEAVE_GL();
4954
4955     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
4956
4957     context_release(context);
4958
4959     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4960      * path is never entered
4961      */
4962     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4963 }
4964
4965 /* Front buffer coordinates are always full screen coordinates, but our GL
4966  * drawable is limited to the window's client area. The sysmem and texture
4967  * copies do have the full screen size. Note that GL has a bottom-left
4968  * origin, while D3D has a top-left origin. */
4969 void surface_translate_drawable_coords(struct wined3d_surface *surface, HWND window, RECT *rect)
4970 {
4971     UINT drawable_height;
4972
4973     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4974             && surface == surface->container.u.swapchain->front_buffer)
4975     {
4976         POINT offset = {0, 0};
4977         RECT windowsize;
4978
4979         ScreenToClient(window, &offset);
4980         OffsetRect(rect, offset.x, offset.y);
4981
4982         GetClientRect(window, &windowsize);
4983         drawable_height = windowsize.bottom - windowsize.top;
4984     }
4985     else
4986     {
4987         drawable_height = surface->resource.height;
4988     }
4989
4990     rect->top = drawable_height - rect->top;
4991     rect->bottom = drawable_height - rect->bottom;
4992 }
4993
4994 /* blit between surface locations. onscreen on different swapchains is not supported.
4995  * depth / stencil is not supported. */
4996 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
4997         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
4998         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
4999 {
5000     const struct wined3d_gl_info *gl_info;
5001     struct wined3d_context *context;
5002     RECT src_rect, dst_rect;
5003     GLenum gl_filter;
5004
5005     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
5006     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
5007             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
5008     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
5009             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
5010
5011     src_rect = *src_rect_in;
5012     dst_rect = *dst_rect_in;
5013
5014     switch (filter)
5015     {
5016         case WINED3DTEXF_LINEAR:
5017             gl_filter = GL_LINEAR;
5018             break;
5019
5020         default:
5021             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
5022         case WINED3DTEXF_NONE:
5023         case WINED3DTEXF_POINT:
5024             gl_filter = GL_NEAREST;
5025             break;
5026     }
5027
5028     if (src_location == SFLAG_INDRAWABLE && surface_is_offscreen(src_surface))
5029         src_location = SFLAG_INTEXTURE;
5030     if (dst_location == SFLAG_INDRAWABLE && surface_is_offscreen(dst_surface))
5031         dst_location = SFLAG_INTEXTURE;
5032
5033     /* Make sure the locations are up-to-date. Loading the destination
5034      * surface isn't required if the entire surface is overwritten. (And is
5035      * in fact harmful if we're being called by surface_load_location() with
5036      * the purpose of loading the destination surface.) */
5037     surface_load_location(src_surface, src_location, NULL);
5038     if (!surface_is_full_rect(dst_surface, &dst_rect))
5039         surface_load_location(dst_surface, dst_location, NULL);
5040
5041     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
5042     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
5043     else context = context_acquire(device, NULL);
5044
5045     if (!context->valid)
5046     {
5047         context_release(context);
5048         WARN("Invalid context, skipping blit.\n");
5049         return;
5050     }
5051
5052     gl_info = context->gl_info;
5053
5054     if (src_location == SFLAG_INDRAWABLE)
5055     {
5056         GLenum buffer = surface_get_gl_buffer(src_surface);
5057
5058         TRACE("Source surface %p is onscreen.\n", src_surface);
5059
5060         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
5061
5062         ENTER_GL();
5063         context_bind_fbo(context, GL_READ_FRAMEBUFFER, NULL);
5064         glReadBuffer(buffer);
5065         checkGLcall("glReadBuffer()");
5066     }
5067     else
5068     {
5069         TRACE("Source surface %p is offscreen.\n", src_surface);
5070         ENTER_GL();
5071         context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
5072         glReadBuffer(GL_COLOR_ATTACHMENT0);
5073         checkGLcall("glReadBuffer()");
5074     }
5075     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
5076     LEAVE_GL();
5077
5078     if (dst_location == SFLAG_INDRAWABLE)
5079     {
5080         GLenum buffer = surface_get_gl_buffer(dst_surface);
5081
5082         TRACE("Destination surface %p is onscreen.\n", dst_surface);
5083
5084         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5085
5086         ENTER_GL();
5087         context_bind_fbo(context, GL_DRAW_FRAMEBUFFER, NULL);
5088         context_set_draw_buffer(context, buffer);
5089     }
5090     else
5091     {
5092         TRACE("Destination surface %p is offscreen.\n", dst_surface);
5093
5094         ENTER_GL();
5095         context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
5096         context_set_draw_buffer(context, GL_COLOR_ATTACHMENT0);
5097     }
5098     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
5099
5100     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
5101     device_invalidate_state(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
5102     device_invalidate_state(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
5103     device_invalidate_state(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
5104     device_invalidate_state(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
5105
5106     glDisable(GL_SCISSOR_TEST);
5107     device_invalidate_state(device, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
5108
5109     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
5110             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
5111     checkGLcall("glBlitFramebuffer()");
5112
5113     LEAVE_GL();
5114
5115     if (wined3d_settings.strict_draw_ordering
5116             || (dst_location == SFLAG_INDRAWABLE
5117             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
5118         wglFlush();
5119
5120     context_release(context);
5121 }
5122
5123 static void surface_blt_to_drawable(struct wined3d_device *device,
5124         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5125         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5126         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5127 {
5128     struct wined3d_context *context;
5129     RECT src_rect, dst_rect;
5130
5131     src_rect = *src_rect_in;
5132     dst_rect = *dst_rect_in;
5133
5134     /* Make sure the surface is up-to-date. This should probably use
5135      * surface_load_location() and worry about the destination surface too,
5136      * unless we're overwriting it completely. */
5137     surface_internal_preload(src_surface, SRGB_RGB);
5138
5139     /* Activate the destination context, set it up for blitting */
5140     context = context_acquire(device, dst_surface);
5141     context_apply_blit_state(context, device);
5142
5143     if (!surface_is_offscreen(dst_surface))
5144         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5145
5146     device->blitter->set_shader(device->blit_priv, context->gl_info, src_surface);
5147
5148     ENTER_GL();
5149
5150     if (color_key)
5151     {
5152         glEnable(GL_ALPHA_TEST);
5153         checkGLcall("glEnable(GL_ALPHA_TEST)");
5154
5155         /* When the primary render target uses P8, the alpha component
5156          * contains the palette index. Which means that the colorkey is one of
5157          * the palette entries. In other cases pixels that should be masked
5158          * away have alpha set to 0. */
5159         if (primary_render_target_is_p8(device))
5160             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5161         else
5162             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5163         checkGLcall("glAlphaFunc");
5164     }
5165     else
5166     {
5167         glDisable(GL_ALPHA_TEST);
5168         checkGLcall("glDisable(GL_ALPHA_TEST)");
5169     }
5170
5171     draw_textured_quad(src_surface, &src_rect, &dst_rect, filter);
5172
5173     if (color_key)
5174     {
5175         glDisable(GL_ALPHA_TEST);
5176         checkGLcall("glDisable(GL_ALPHA_TEST)");
5177     }
5178
5179     LEAVE_GL();
5180
5181     /* Leave the opengl state valid for blitting */
5182     device->blitter->unset_shader(context->gl_info);
5183
5184     if (wined3d_settings.strict_draw_ordering
5185             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5186             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5187         wglFlush(); /* Flush to ensure ordering across contexts. */
5188
5189     context_release(context);
5190 }
5191
5192 /* Do not call while under the GL lock. */
5193 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5194 {
5195     struct wined3d_device *device = s->resource.device;
5196     const struct blit_shader *blitter;
5197
5198     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5199             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5200     if (!blitter)
5201     {
5202         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5203         return WINED3DERR_INVALIDCALL;
5204     }
5205
5206     return blitter->color_fill(device, s, rect, color);
5207 }
5208
5209 /* Do not call while under the GL lock. */
5210 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *DestRect,
5211         struct wined3d_surface *src_surface, const RECT *SrcRect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5212         WINED3DTEXTUREFILTERTYPE Filter)
5213 {
5214     struct wined3d_device *device = dst_surface->resource.device;
5215     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5216     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5217     RECT dst_rect, src_rect;
5218
5219     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5220             dst_surface, wine_dbgstr_rect(DestRect), src_surface, wine_dbgstr_rect(SrcRect),
5221             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5222
5223     /* Get the swapchain. One of the surfaces has to be a primary surface */
5224     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5225     {
5226         WARN("Destination is in sysmem, rejecting gl blt\n");
5227         return WINED3DERR_INVALIDCALL;
5228     }
5229
5230     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5231         dstSwapchain = dst_surface->container.u.swapchain;
5232
5233     if (src_surface)
5234     {
5235         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5236         {
5237             WARN("Src is in sysmem, rejecting gl blt\n");
5238             return WINED3DERR_INVALIDCALL;
5239         }
5240
5241         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5242             srcSwapchain = src_surface->container.u.swapchain;
5243     }
5244
5245     /* Early sort out of cases where no render target is used */
5246     if (!dstSwapchain && !srcSwapchain
5247             && src_surface != device->fb.render_targets[0]
5248             && dst_surface != device->fb.render_targets[0])
5249     {
5250         TRACE("No surface is render target, not using hardware blit.\n");
5251         return WINED3DERR_INVALIDCALL;
5252     }
5253
5254     /* No destination color keying supported */
5255     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5256     {
5257         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5258         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5259         return WINED3DERR_INVALIDCALL;
5260     }
5261
5262     surface_get_rect(dst_surface, DestRect, &dst_rect);
5263     if (src_surface) surface_get_rect(src_surface, SrcRect, &src_rect);
5264
5265     /* The only case where both surfaces on a swapchain are supported is a back buffer -> front buffer blit on the same swapchain */
5266     if (dstSwapchain && dstSwapchain == srcSwapchain && dstSwapchain->back_buffers
5267             && dst_surface == dstSwapchain->front_buffer
5268             && src_surface == dstSwapchain->back_buffers[0])
5269     {
5270         /* Half-Life does a Blt from the back buffer to the front buffer,
5271          * Full surface size, no flags... Use present instead
5272          *
5273          * This path will only be entered for d3d7 and ddraw apps, because d3d8/9 offer no way to blit TO the front buffer
5274          */
5275
5276         /* Check rects - IWineD3DDevice_Present doesn't handle them */
5277         while(1)
5278         {
5279             TRACE("Looking if a Present can be done...\n");
5280             /* Source Rectangle must be full surface */
5281             if (src_rect.left || src_rect.top
5282                     || src_rect.right != src_surface->resource.width
5283                     || src_rect.bottom != src_surface->resource.height)
5284             {
5285                 TRACE("No, Source rectangle doesn't match\n");
5286                 break;
5287             }
5288
5289             /* No stretching may occur */
5290             if(src_rect.right != dst_rect.right - dst_rect.left ||
5291                src_rect.bottom != dst_rect.bottom - dst_rect.top) {
5292                 TRACE("No, stretching is done\n");
5293                 break;
5294             }
5295
5296             /* Destination must be full surface or match the clipping rectangle */
5297             if (dst_surface->clipper && dst_surface->clipper->hWnd)
5298             {
5299                 RECT cliprect;
5300                 POINT pos[2];
5301                 GetClientRect(dst_surface->clipper->hWnd, &cliprect);
5302                 pos[0].x = dst_rect.left;
5303                 pos[0].y = dst_rect.top;
5304                 pos[1].x = dst_rect.right;
5305                 pos[1].y = dst_rect.bottom;
5306                 MapWindowPoints(GetDesktopWindow(), dst_surface->clipper->hWnd, pos, 2);
5307
5308                 if(pos[0].x != cliprect.left  || pos[0].y != cliprect.top   ||
5309                    pos[1].x != cliprect.right || pos[1].y != cliprect.bottom)
5310                 {
5311                     TRACE("No, dest rectangle doesn't match(clipper)\n");
5312                     TRACE("Clip rect at %s\n", wine_dbgstr_rect(&cliprect));
5313                     TRACE("Blt dest: %s\n", wine_dbgstr_rect(&dst_rect));
5314                     break;
5315                 }
5316             }
5317             else if (dst_rect.left || dst_rect.top
5318                     || dst_rect.right != dst_surface->resource.width
5319                     || dst_rect.bottom != dst_surface->resource.height)
5320             {
5321                 TRACE("No, dest rectangle doesn't match(surface size)\n");
5322                 break;
5323             }
5324
5325             TRACE("Yes\n");
5326
5327             /* These flags are unimportant for the flag check, remove them */
5328             if (!(flags & ~(WINEDDBLT_DONOTWAIT | WINEDDBLT_WAIT)))
5329             {
5330                 WINED3DSWAPEFFECT orig_swap = dstSwapchain->presentParms.SwapEffect;
5331
5332                 /* The idea behind this is that a glReadPixels and a glDrawPixels call
5333                     * take very long, while a flip is fast.
5334                     * This applies to Half-Life, which does such Blts every time it finished
5335                     * a frame, and to Prince of Persia 3D, which uses this to draw at least the main
5336                     * menu. This is also used by all apps when they do windowed rendering
5337                     *
5338                     * The problem is that flipping is not really the same as copying. After a
5339                     * Blt the front buffer is a copy of the back buffer, and the back buffer is
5340                     * untouched. Therefore it's necessary to override the swap effect
5341                     * and to set it back after the flip.
5342                     *
5343                     * Windowed Direct3D < 7 apps do the same. The D3D7 sdk demos are nice
5344                     * testcases.
5345                     */
5346
5347                 dstSwapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
5348                 dstSwapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
5349
5350                 TRACE("Full screen back buffer -> front buffer blt, performing a flip instead.\n");
5351                 wined3d_swapchain_present(dstSwapchain, NULL, NULL, dstSwapchain->win_handle, NULL, 0);
5352
5353                 dstSwapchain->presentParms.SwapEffect = orig_swap;
5354
5355                 return WINED3D_OK;
5356             }
5357             break;
5358         }
5359
5360         TRACE("Unsupported blit between buffers on the same swapchain\n");
5361         return WINED3DERR_INVALIDCALL;
5362     } else if(dstSwapchain && dstSwapchain == srcSwapchain) {
5363         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5364         return WINED3DERR_INVALIDCALL;
5365     } else if(dstSwapchain && srcSwapchain) {
5366         FIXME("Implement hardware blit between two different swapchains\n");
5367         return WINED3DERR_INVALIDCALL;
5368     }
5369     else if (dstSwapchain)
5370     {
5371         /* Handled with regular texture -> swapchain blit */
5372         if (src_surface == device->fb.render_targets[0])
5373             TRACE("Blit from active render target to a swapchain\n");
5374     }
5375     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5376     {
5377         FIXME("Implement blit from a swapchain to the active render target\n");
5378         return WINED3DERR_INVALIDCALL;
5379     }
5380
5381     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5382     {
5383         /* Blit from render target to texture */
5384         BOOL stretchx;
5385
5386         /* P8 read back is not implemented */
5387         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5388                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5389         {
5390             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5391             return WINED3DERR_INVALIDCALL;
5392         }
5393
5394         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5395         {
5396             TRACE("Color keying not supported by frame buffer to texture blit\n");
5397             return WINED3DERR_INVALIDCALL;
5398             /* Destination color key is checked above */
5399         }
5400
5401         if(dst_rect.right - dst_rect.left != src_rect.right - src_rect.left) {
5402             stretchx = TRUE;
5403         } else {
5404             stretchx = FALSE;
5405         }
5406
5407         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5408          * flip the image nor scale it.
5409          *
5410          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5411          * -> If the app wants a image width an unscaled width, copy it line per line
5412          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5413          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5414          *    back buffer. This is slower than reading line per line, thus not used for flipping
5415          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5416          *    pixel by pixel
5417          *
5418          * If EXT_framebuffer_blit is supported that can be used instead. Note that EXT_framebuffer_blit implies
5419          * FBO support, so it doesn't really make sense to try and make it work with different offscreen rendering
5420          * backends. */
5421         if (fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5422                 &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5423                 &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5424         {
5425             surface_blt_fbo(device, Filter,
5426                     src_surface, SFLAG_INDRAWABLE, &src_rect,
5427                     dst_surface, SFLAG_INDRAWABLE, &dst_rect);
5428             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5429         }
5430         else if (!stretchx || dst_rect.right - dst_rect.left > src_surface->resource.width
5431                 || dst_rect.bottom - dst_rect.top > src_surface->resource.height)
5432         {
5433             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5434             fb_copy_to_texture_direct(dst_surface, src_surface, &src_rect, &dst_rect, Filter);
5435         } else {
5436             TRACE("Using hardware stretching to flip / stretch the texture\n");
5437             fb_copy_to_texture_hwstretch(dst_surface, src_surface, &src_rect, &dst_rect, Filter);
5438         }
5439
5440         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5441         {
5442             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5443             dst_surface->resource.allocatedMemory = NULL;
5444             dst_surface->resource.heapMemory = NULL;
5445         }
5446         else
5447         {
5448             dst_surface->flags &= ~SFLAG_INSYSMEM;
5449         }
5450
5451         return WINED3D_OK;
5452     }
5453     else if (src_surface)
5454     {
5455         /* Blit from offscreen surface to render target */
5456         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5457         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5458
5459         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5460
5461         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5462                 && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5463                         &src_rect, src_surface->resource.usage, src_surface->resource.pool,
5464                         src_surface->resource.format,
5465                         &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5466                         dst_surface->resource.format))
5467         {
5468             TRACE("Using surface_blt_fbo.\n");
5469             /* The source is always a texture, but never the currently active render target, and the texture
5470              * contents are never upside down. */
5471             surface_blt_fbo(device, Filter,
5472                     src_surface, SFLAG_INDRAWABLE, &src_rect,
5473                     dst_surface, SFLAG_INDRAWABLE, &dst_rect);
5474             surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5475             return WINED3D_OK;
5476         }
5477
5478         if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5479                 && arbfp_blit.blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5480                         &src_rect, src_surface->resource.usage, src_surface->resource.pool,
5481                         src_surface->resource.format,
5482                         &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
5483                         dst_surface->resource.format))
5484         {
5485             return arbfp_blit_surface(device, src_surface, &src_rect, dst_surface, &dst_rect,
5486                     WINED3D_BLIT_OP_COLOR_BLIT, Filter);
5487         }
5488
5489         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5490                 &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5491                 &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5492         {
5493             FIXME("Unsupported blit operation falling back to software\n");
5494             return WINED3DERR_INVALIDCALL;
5495         }
5496
5497         /* Color keying: Check if we have to do a color keyed blt,
5498          * and if not check if a color key is activated.
5499          *
5500          * Just modify the color keying parameters in the surface and restore them afterwards
5501          * The surface keeps track of the color key last used to load the opengl surface.
5502          * PreLoad will catch the change to the flags and color key and reload if necessary.
5503          */
5504         if (flags & WINEDDBLT_KEYSRC)
5505         {
5506             /* Use color key from surface */
5507         }
5508         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5509         {
5510             /* Use color key from DDBltFx */
5511             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5512             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5513         }
5514         else
5515         {
5516             /* Do not use color key */
5517             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5518         }
5519
5520         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5521                 src_surface, &src_rect, dst_surface, &dst_rect);
5522
5523         /* Restore the color key parameters */
5524         src_surface->CKeyFlags = oldCKeyFlags;
5525         src_surface->SrcBltCKey = oldBltCKey;
5526
5527         surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5528
5529         return WINED3D_OK;
5530     }
5531     else
5532     {
5533         /* Source-Less Blit to render target */
5534         if (flags & WINEDDBLT_COLORFILL)
5535         {
5536             WINED3DCOLORVALUE color;
5537
5538             TRACE("Colorfill\n");
5539
5540             /* The color as given in the Blt function is in the surface format. */
5541             if (!surface_convert_color_to_float(dst_surface, DDBltFx->u5.dwFillColor, &color))
5542                 return WINED3DERR_INVALIDCALL;
5543
5544             return surface_color_fill(dst_surface, &dst_rect, &color);
5545         }
5546     }
5547
5548     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5549     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5550     return WINED3DERR_INVALIDCALL;
5551 }
5552
5553 /* GL locking is done by the caller */
5554 static void surface_depth_blt(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
5555         GLuint texture, GLsizei w, GLsizei h, GLenum target)
5556 {
5557     struct wined3d_device *device = surface->resource.device;
5558     GLint compare_mode = GL_NONE;
5559     struct blt_info info;
5560     GLint old_binding = 0;
5561     RECT rect;
5562
5563     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5564
5565     glDisable(GL_CULL_FACE);
5566     glDisable(GL_BLEND);
5567     glDisable(GL_ALPHA_TEST);
5568     glDisable(GL_SCISSOR_TEST);
5569     glDisable(GL_STENCIL_TEST);
5570     glEnable(GL_DEPTH_TEST);
5571     glDepthFunc(GL_ALWAYS);
5572     glDepthMask(GL_TRUE);
5573     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5574     glViewport(0, surface->pow2Height - h, w, h);
5575
5576     SetRect(&rect, 0, h, w, 0);
5577     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5578     GL_EXTCALL(glActiveTextureARB(GL_TEXTURE0_ARB));
5579     glGetIntegerv(info.binding, &old_binding);
5580     glBindTexture(info.bind_target, texture);
5581     if (gl_info->supported[ARB_SHADOW])
5582     {
5583         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5584         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5585     }
5586
5587     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5588             gl_info, info.tex_type, &surface->ds_current_size);
5589
5590     glBegin(GL_TRIANGLE_STRIP);
5591     glTexCoord3fv(info.coords[0]);
5592     glVertex2f(-1.0f, -1.0f);
5593     glTexCoord3fv(info.coords[1]);
5594     glVertex2f(1.0f, -1.0f);
5595     glTexCoord3fv(info.coords[2]);
5596     glVertex2f(-1.0f, 1.0f);
5597     glTexCoord3fv(info.coords[3]);
5598     glVertex2f(1.0f, 1.0f);
5599     glEnd();
5600
5601     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5602     glBindTexture(info.bind_target, old_binding);
5603
5604     glPopAttrib();
5605
5606     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5607 }
5608
5609 void surface_modify_ds_location(struct wined3d_surface *surface,
5610         DWORD location, UINT w, UINT h)
5611 {
5612     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5613
5614     if (location & ~SFLAG_DS_LOCATIONS)
5615         FIXME("Invalid location (%#x) specified.\n", location);
5616
5617     surface->ds_current_size.cx = w;
5618     surface->ds_current_size.cy = h;
5619     surface->flags &= ~SFLAG_DS_LOCATIONS;
5620     surface->flags |= location;
5621 }
5622
5623 /* Context activation is done by the caller. */
5624 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5625 {
5626     struct wined3d_device *device = surface->resource.device;
5627     const struct wined3d_gl_info *gl_info = context->gl_info;
5628     GLsizei w, h;
5629
5630     TRACE("surface %p, new location %#x.\n", surface, location);
5631
5632     /* TODO: Make this work for modes other than FBO */
5633     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5634
5635     if (!(surface->flags & location))
5636     {
5637         w = surface->ds_current_size.cx;
5638         h = surface->ds_current_size.cy;
5639         surface->ds_current_size.cx = 0;
5640         surface->ds_current_size.cy = 0;
5641     }
5642     else
5643     {
5644         w = surface->resource.width;
5645         h = surface->resource.height;
5646     }
5647
5648     if (surface->ds_current_size.cx == surface->resource.width
5649             && surface->ds_current_size.cy == surface->resource.height)
5650     {
5651         TRACE("Location (%#x) is already up to date.\n", location);
5652         return;
5653     }
5654
5655     if (surface->current_renderbuffer)
5656     {
5657         FIXME("Not supported with fixed up depth stencil.\n");
5658         return;
5659     }
5660
5661     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5662     {
5663         /* This mostly happens when a depth / stencil is used without being
5664          * cleared first. In principle we could upload from sysmem, or
5665          * explicitly clear before first usage. For the moment there don't
5666          * appear to be a lot of applications depending on this, so a FIXME
5667          * should do. */
5668         FIXME("No up to date depth stencil location.\n");
5669         surface->flags |= location;
5670         surface->ds_current_size.cx = surface->resource.width;
5671         surface->ds_current_size.cy = surface->resource.height;
5672         return;
5673     }
5674
5675     if (location == SFLAG_DS_OFFSCREEN)
5676     {
5677         GLint old_binding = 0;
5678         GLenum bind_target;
5679
5680         /* The render target is allowed to be smaller than the depth/stencil
5681          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5682          * than the offscreen surface. Don't overwrite the offscreen surface
5683          * with undefined data. */
5684         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5685         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5686
5687         TRACE("Copying onscreen depth buffer to depth texture.\n");
5688
5689         ENTER_GL();
5690
5691         if (!device->depth_blt_texture)
5692         {
5693             glGenTextures(1, &device->depth_blt_texture);
5694         }
5695
5696         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5697          * directly on the FBO texture. That's because we need to flip. */
5698         context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
5699         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5700         {
5701             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5702             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5703         }
5704         else
5705         {
5706             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5707             bind_target = GL_TEXTURE_2D;
5708         }
5709         glBindTexture(bind_target, device->depth_blt_texture);
5710         glCopyTexImage2D(bind_target, surface->texture_level, surface->resource.format->glInternal, 0, 0, w, h, 0);
5711         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5712         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5713         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5714         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5715         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5716         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5717         glBindTexture(bind_target, old_binding);
5718
5719         /* Setup the destination */
5720         if (!device->depth_blt_rb)
5721         {
5722             gl_info->fbo_ops.glGenRenderbuffers(1, &device->depth_blt_rb);
5723             checkGLcall("glGenRenderbuffersEXT");
5724         }
5725         if (device->depth_blt_rb_w != w || device->depth_blt_rb_h != h)
5726         {
5727             gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, device->depth_blt_rb);
5728             checkGLcall("glBindRenderbufferEXT");
5729             gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, w, h);
5730             checkGLcall("glRenderbufferStorageEXT");
5731             device->depth_blt_rb_w = w;
5732             device->depth_blt_rb_h = h;
5733         }
5734
5735         context_bind_fbo(context, GL_FRAMEBUFFER, &context->dst_fbo);
5736         gl_info->fbo_ops.glFramebufferRenderbuffer(GL_FRAMEBUFFER,
5737                 GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, device->depth_blt_rb);
5738         checkGLcall("glFramebufferRenderbufferEXT");
5739         context_attach_depth_stencil_fbo(context, GL_FRAMEBUFFER, surface, FALSE);
5740
5741         /* Do the actual blit */
5742         surface_depth_blt(surface, gl_info, device->depth_blt_texture, w, h, bind_target);
5743         checkGLcall("depth_blt");
5744
5745         if (context->current_fbo) context_bind_fbo(context, GL_FRAMEBUFFER, &context->current_fbo->id);
5746         else context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
5747
5748         LEAVE_GL();
5749
5750         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5751     }
5752     else if (location == SFLAG_DS_ONSCREEN)
5753     {
5754         TRACE("Copying depth texture to onscreen depth buffer.\n");
5755
5756         ENTER_GL();
5757
5758         context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
5759         surface_depth_blt(surface, gl_info, surface->texture_name,
5760                 w, h, surface->texture_target);
5761         checkGLcall("depth_blt");
5762
5763         if (context->current_fbo) context_bind_fbo(context, GL_FRAMEBUFFER, &context->current_fbo->id);
5764
5765         LEAVE_GL();
5766
5767         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5768     }
5769     else
5770     {
5771         ERR("Invalid location (%#x) specified.\n", location);
5772     }
5773
5774     surface->flags |= location;
5775     surface->ds_current_size.cx = surface->resource.width;
5776     surface->ds_current_size.cy = surface->resource.height;
5777 }
5778
5779 void surface_modify_location(struct wined3d_surface *surface, DWORD flag, BOOL persistent)
5780 {
5781     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5782     struct wined3d_surface *overlay;
5783
5784     TRACE("surface %p, location %s, persistent %#x.\n",
5785             surface, debug_surflocation(flag), persistent);
5786
5787     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
5788     {
5789         if (surface_is_offscreen(surface))
5790         {
5791             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
5792             if (flag & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)) flag |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
5793         }
5794         else
5795         {
5796             TRACE("Surface %p is an onscreen surface.\n", surface);
5797         }
5798     }
5799
5800     if (flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5801             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5802     {
5803         flag |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5804     }
5805
5806     if (persistent)
5807     {
5808         if (((surface->flags & SFLAG_INTEXTURE) && !(flag & SFLAG_INTEXTURE))
5809                 || ((surface->flags & SFLAG_INSRGBTEX) && !(flag & SFLAG_INSRGBTEX)))
5810         {
5811             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5812             {
5813                 TRACE("Passing to container.\n");
5814                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5815             }
5816         }
5817         surface->flags &= ~SFLAG_LOCATIONS;
5818         surface->flags |= flag;
5819
5820         /* Redraw emulated overlays, if any */
5821         if (flag & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5822         {
5823             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5824             {
5825                 overlay->surface_ops->surface_draw_overlay(overlay);
5826             }
5827         }
5828     }
5829     else
5830     {
5831         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5832         {
5833             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5834             {
5835                 TRACE("Passing to container\n");
5836                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5837             }
5838         }
5839         surface->flags &= ~flag;
5840     }
5841
5842     if (!(surface->flags & SFLAG_LOCATIONS))
5843     {
5844         ERR("Surface %p does not have any up to date location.\n", surface);
5845     }
5846 }
5847
5848 static DWORD resource_access_from_location(DWORD location)
5849 {
5850     switch (location)
5851     {
5852         case SFLAG_INSYSMEM:
5853             return WINED3D_RESOURCE_ACCESS_CPU;
5854
5855         case SFLAG_INDRAWABLE:
5856         case SFLAG_INSRGBTEX:
5857         case SFLAG_INTEXTURE:
5858             return WINED3D_RESOURCE_ACCESS_GPU;
5859
5860         default:
5861             FIXME("Unhandled location %#x.\n", location);
5862             return 0;
5863     }
5864 }
5865
5866 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD flag, const RECT *rect)
5867 {
5868     struct wined3d_device *device = surface->resource.device;
5869     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5870     BOOL drawable_read_ok = surface_is_offscreen(surface);
5871     struct wined3d_format format;
5872     CONVERT_TYPES convert;
5873     int width, pitch, outpitch;
5874     BYTE *mem;
5875     BOOL in_fbo = FALSE;
5876
5877     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(flag), wine_dbgstr_rect(rect));
5878
5879     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5880     {
5881         if (flag == SFLAG_INTEXTURE)
5882         {
5883             struct wined3d_context *context = context_acquire(device, NULL);
5884             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
5885             context_release(context);
5886             return WINED3D_OK;
5887         }
5888         else
5889         {
5890             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(flag));
5891             return WINED3DERR_INVALIDCALL;
5892         }
5893     }
5894
5895     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
5896     {
5897         if (surface_is_offscreen(surface))
5898         {
5899             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets.
5900              * Prefer SFLAG_INTEXTURE. */
5901             if (flag == SFLAG_INDRAWABLE) flag = SFLAG_INTEXTURE;
5902             drawable_read_ok = FALSE;
5903             in_fbo = TRUE;
5904         }
5905         else
5906         {
5907             TRACE("Surface %p is an onscreen surface.\n", surface);
5908         }
5909     }
5910
5911     if (flag == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5912     {
5913         flag = SFLAG_INTEXTURE;
5914     }
5915
5916     if (surface->flags & flag)
5917     {
5918         TRACE("Location already up to date\n");
5919         return WINED3D_OK;
5920     }
5921
5922     if (WARN_ON(d3d_surface))
5923     {
5924         DWORD required_access = resource_access_from_location(flag);
5925         if ((surface->resource.access_flags & required_access) != required_access)
5926             WARN("Operation requires %#x access, but surface only has %#x.\n",
5927                     required_access, surface->resource.access_flags);
5928     }
5929
5930     if (!(surface->flags & SFLAG_LOCATIONS))
5931     {
5932         ERR("Surface %p does not have any up to date location.\n", surface);
5933         surface->flags |= SFLAG_LOST;
5934         return WINED3DERR_DEVICELOST;
5935     }
5936
5937     if (flag == SFLAG_INSYSMEM)
5938     {
5939         surface_prepare_system_memory(surface);
5940
5941         /* Download the surface to system memory */
5942         if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5943         {
5944             struct wined3d_context *context = NULL;
5945
5946             if (!device->isInDraw) context = context_acquire(device, NULL);
5947
5948             surface_bind_and_dirtify(surface, gl_info, !(surface->flags & SFLAG_INTEXTURE));
5949             surface_download_data(surface, gl_info);
5950
5951             if (context) context_release(context);
5952         }
5953         else
5954         {
5955             /* Note: It might be faster to download into a texture first. */
5956             read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5957                     wined3d_surface_get_pitch(surface));
5958         }
5959     }
5960     else if (flag == SFLAG_INDRAWABLE)
5961     {
5962         if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5963             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5964
5965         if (surface->flags & SFLAG_INTEXTURE)
5966         {
5967             RECT r;
5968
5969             surface_get_rect(surface, rect, &r);
5970             surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5971         }
5972         else
5973         {
5974             int byte_count;
5975             if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5976             {
5977                 /* This needs a shader to convert the srgb data sampled from the GL texture into RGB
5978                  * values, otherwise we get incorrect values in the target. For now go the slow way
5979                  * via a system memory copy
5980                  */
5981                 surface_load_location(surface, SFLAG_INSYSMEM, rect);
5982             }
5983
5984             d3dfmt_get_conv(surface, FALSE /* We need color keying */,
5985                     FALSE /* We won't use textures */, &format, &convert);
5986
5987             /* The width is in 'length' not in bytes */
5988             width = surface->resource.width;
5989             pitch = wined3d_surface_get_pitch(surface);
5990
5991             /* Don't use PBOs for converted surfaces. During PBO conversion we look at SFLAG_CONVERTED
5992              * but it isn't set (yet) in all cases it is getting called. */
5993             if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5994             {
5995                 struct wined3d_context *context = NULL;
5996
5997                 TRACE("Removing the pbo attached to surface %p.\n", surface);
5998
5999                 if (!device->isInDraw) context = context_acquire(device, NULL);
6000                 surface_remove_pbo(surface, gl_info);
6001                 if (context) context_release(context);
6002             }
6003
6004             if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
6005             {
6006                 int height = surface->resource.height;
6007                 byte_count = format.conv_byte_count;
6008
6009                 /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6010                 outpitch = width * byte_count;
6011                 outpitch = (outpitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6012
6013                 mem = HeapAlloc(GetProcessHeap(), 0, outpitch * height);
6014                 if(!mem) {
6015                     ERR("Out of memory %d, %d!\n", outpitch, height);
6016                     return WINED3DERR_OUTOFVIDEOMEMORY;
6017                 }
6018                 d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, pitch,
6019                         width, height, outpitch, convert, surface);
6020
6021                 surface->flags |= SFLAG_CONVERTED;
6022             }
6023             else
6024             {
6025                 surface->flags &= ~SFLAG_CONVERTED;
6026                 mem = surface->resource.allocatedMemory;
6027                 byte_count = format.byte_count;
6028             }
6029
6030             flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
6031
6032             /* Don't delete PBO memory */
6033             if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6034                 HeapFree(GetProcessHeap(), 0, mem);
6035         }
6036     }
6037     else /* if(flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) */
6038     {
6039         const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
6040
6041         if (drawable_read_ok && (surface->flags & SFLAG_INDRAWABLE))
6042         {
6043             read_from_framebuffer_texture(surface, flag == SFLAG_INSRGBTEX);
6044         }
6045         else if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6046                 && (surface->resource.format->flags & attach_flags) == attach_flags
6047                 && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6048                         NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6049                         NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6050         {
6051             DWORD src_location = flag == SFLAG_INSRGBTEX ? SFLAG_INTEXTURE : SFLAG_INSRGBTEX;
6052             RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6053
6054             surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6055                     surface, src_location, &rect, surface, flag, &rect);
6056         }
6057         else
6058         {
6059             /* Upload from system memory */
6060             BOOL srgb = flag == SFLAG_INSRGBTEX;
6061             struct wined3d_context *context = NULL;
6062
6063             d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6064                     TRUE /* We will use textures */, &format, &convert);
6065
6066             if (srgb)
6067             {
6068                 if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6069                 {
6070                     /* Performance warning... */
6071                     FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6072                     surface_load_location(surface, SFLAG_INSYSMEM, rect);
6073                 }
6074             }
6075             else
6076             {
6077                 if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6078                 {
6079                     /* Performance warning... */
6080                     FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6081                     surface_load_location(surface, SFLAG_INSYSMEM, rect);
6082                 }
6083             }
6084             if (!(surface->flags & SFLAG_INSYSMEM))
6085             {
6086                 WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6087                 /* Lets hope we get it from somewhere... */
6088                 surface_load_location(surface, SFLAG_INSYSMEM, rect);
6089             }
6090
6091             if (!device->isInDraw) context = context_acquire(device, NULL);
6092
6093             surface_prepare_texture(surface, gl_info, srgb);
6094             surface_bind_and_dirtify(surface, gl_info, srgb);
6095
6096             if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6097             {
6098                 surface->flags |= SFLAG_GLCKEY;
6099                 surface->glCKey = surface->SrcBltCKey;
6100             }
6101             else surface->flags &= ~SFLAG_GLCKEY;
6102
6103             /* The width is in 'length' not in bytes */
6104             width = surface->resource.width;
6105             pitch = wined3d_surface_get_pitch(surface);
6106
6107             /* Don't use PBOs for converted surfaces. During PBO conversion we look at SFLAG_CONVERTED
6108              * but it isn't set (yet) in all cases it is getting called. */
6109             if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6110             {
6111                 TRACE("Removing the pbo attached to surface %p.\n", surface);
6112                 surface_remove_pbo(surface, gl_info);
6113             }
6114
6115             if (format.convert)
6116             {
6117                 /* This code is entered for texture formats which need a fixup. */
6118                 UINT height = surface->resource.height;
6119
6120                 /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6121                 outpitch = width * format.conv_byte_count;
6122                 outpitch = (outpitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6123
6124                 mem = HeapAlloc(GetProcessHeap(), 0, outpitch * height);
6125                 if(!mem) {
6126                     ERR("Out of memory %d, %d!\n", outpitch, height);
6127                     if (context) context_release(context);
6128                     return WINED3DERR_OUTOFVIDEOMEMORY;
6129                 }
6130                 format.convert(surface->resource.allocatedMemory, mem, pitch, width, height);
6131             }
6132             else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6133             {
6134                 /* This code is only entered for color keying fixups */
6135                 UINT height = surface->resource.height;
6136
6137                 /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6138                 outpitch = width * format.conv_byte_count;
6139                 outpitch = (outpitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6140
6141                 mem = HeapAlloc(GetProcessHeap(), 0, outpitch * height);
6142                 if(!mem) {
6143                     ERR("Out of memory %d, %d!\n", outpitch, height);
6144                     if (context) context_release(context);
6145                     return WINED3DERR_OUTOFVIDEOMEMORY;
6146                 }
6147                 d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, pitch,
6148                         width, height, outpitch, convert, surface);
6149             }
6150             else
6151             {
6152                 mem = surface->resource.allocatedMemory;
6153             }
6154
6155             /* Make sure the correct pitch is used */
6156             ENTER_GL();
6157             glPixelStorei(GL_UNPACK_ROW_LENGTH, width);
6158             LEAVE_GL();
6159
6160             if (mem || (surface->flags & SFLAG_PBO))
6161                 surface_upload_data(surface, gl_info, &format, srgb, mem);
6162
6163             /* Restore the default pitch */
6164             ENTER_GL();
6165             glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
6166             LEAVE_GL();
6167
6168             if (context) context_release(context);
6169
6170             /* Don't delete PBO memory */
6171             if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6172                 HeapFree(GetProcessHeap(), 0, mem);
6173         }
6174     }
6175
6176     if (!rect)
6177     {
6178         surface->flags |= flag;
6179
6180         if (flag != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6181             surface_evict_sysmem(surface);
6182     }
6183
6184     if (in_fbo && (surface->flags & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)))
6185     {
6186         /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
6187         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
6188     }
6189
6190     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6191             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6192     {
6193         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6194     }
6195
6196     return WINED3D_OK;
6197 }
6198
6199 BOOL surface_is_offscreen(struct wined3d_surface *surface)
6200 {
6201     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6202
6203     /* Not on a swapchain - must be offscreen */
6204     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6205
6206     /* The front buffer is always onscreen */
6207     if (surface == swapchain->front_buffer) return FALSE;
6208
6209     /* If the swapchain is rendered to an FBO, the backbuffer is
6210      * offscreen, otherwise onscreen */
6211     return swapchain->render_to_fbo;
6212 }
6213
6214 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6215 /* Context activation is done by the caller. */
6216 static void ffp_blit_free(struct wined3d_device *device) { }
6217
6218 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6219 /* Context activation is done by the caller. */
6220 static void ffp_blit_p8_upload_palette(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6221 {
6222     BYTE table[256][4];
6223     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6224
6225     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6226
6227     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6228     ENTER_GL();
6229     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6230     LEAVE_GL();
6231 }
6232
6233 /* Context activation is done by the caller. */
6234 static HRESULT ffp_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6235 {
6236     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6237
6238     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6239      * else the surface is converted in software at upload time in LoadLocation.
6240      */
6241     if(fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6242         ffp_blit_p8_upload_palette(surface, gl_info);
6243
6244     ENTER_GL();
6245     glEnable(surface->texture_target);
6246     checkGLcall("glEnable(surface->texture_target)");
6247     LEAVE_GL();
6248     return WINED3D_OK;
6249 }
6250
6251 /* Context activation is done by the caller. */
6252 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6253 {
6254     ENTER_GL();
6255     glDisable(GL_TEXTURE_2D);
6256     checkGLcall("glDisable(GL_TEXTURE_2D)");
6257     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6258     {
6259         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6260         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6261     }
6262     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6263     {
6264         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6265         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6266     }
6267     LEAVE_GL();
6268 }
6269
6270 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6271         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6272         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6273 {
6274     enum complex_fixup src_fixup;
6275
6276     switch (blit_op)
6277     {
6278         case WINED3D_BLIT_OP_COLOR_BLIT:
6279             src_fixup = get_complex_fixup(src_format->color_fixup);
6280             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6281             {
6282                 TRACE("Checking support for fixup:\n");
6283                 dump_color_fixup_desc(src_format->color_fixup);
6284             }
6285
6286             if (!is_identity_fixup(dst_format->color_fixup))
6287             {
6288                 TRACE("Destination fixups are not supported\n");
6289                 return FALSE;
6290             }
6291
6292             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6293             {
6294                 TRACE("P8 fixup supported\n");
6295                 return TRUE;
6296             }
6297
6298             /* We only support identity conversions. */
6299             if (is_identity_fixup(src_format->color_fixup))
6300             {
6301                 TRACE("[OK]\n");
6302                 return TRUE;
6303             }
6304
6305             TRACE("[FAILED]\n");
6306             return FALSE;
6307
6308         case WINED3D_BLIT_OP_COLOR_FILL:
6309             if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6310             {
6311                 TRACE("Color fill not supported\n");
6312                 return FALSE;
6313             }
6314
6315             return TRUE;
6316
6317         case WINED3D_BLIT_OP_DEPTH_FILL:
6318             return TRUE;
6319
6320         default:
6321             TRACE("Unsupported blit_op=%d\n", blit_op);
6322             return FALSE;
6323     }
6324 }
6325
6326 /* Do not call while under the GL lock. */
6327 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6328         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6329 {
6330     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6331
6332     return device_clear_render_targets(device, 1, &dst_surface, NULL,
6333             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6334 }
6335
6336 /* Do not call while under the GL lock. */
6337 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6338         struct wined3d_surface *surface, const RECT *rect, float depth)
6339 {
6340     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6341
6342     return device_clear_render_targets(device, 0, NULL, surface,
6343             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6344 }
6345
6346 const struct blit_shader ffp_blit =  {
6347     ffp_blit_alloc,
6348     ffp_blit_free,
6349     ffp_blit_set,
6350     ffp_blit_unset,
6351     ffp_blit_supported,
6352     ffp_blit_color_fill,
6353     ffp_blit_depth_fill,
6354 };
6355
6356 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6357 {
6358     return WINED3D_OK;
6359 }
6360
6361 /* Context activation is done by the caller. */
6362 static void cpu_blit_free(struct wined3d_device *device)
6363 {
6364 }
6365
6366 /* Context activation is done by the caller. */
6367 static HRESULT cpu_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6368 {
6369     return WINED3D_OK;
6370 }
6371
6372 /* Context activation is done by the caller. */
6373 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6374 {
6375 }
6376
6377 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6378         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6379         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6380 {
6381     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6382     {
6383         return TRUE;
6384     }
6385
6386     return FALSE;
6387 }
6388
6389 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6390         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6391         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6392 {
6393     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6394     const struct wined3d_format *src_format, *dst_format;
6395     struct wined3d_surface *orig_src = src_surface;
6396     WINED3DLOCKED_RECT dlock, slock;
6397     HRESULT hr = WINED3D_OK;
6398     const BYTE *sbuf;
6399     RECT xdst,xsrc;
6400     BYTE *dbuf;
6401     int x, y;
6402
6403     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6404             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6405             flags, fx, debug_d3dtexturefiltertype(filter));
6406
6407     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
6408     {
6409         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY\n");
6410         return WINEDDERR_SURFACEBUSY;
6411     }
6412
6413     /* First check for the validity of source / destination rectangles.
6414      * This was verified using a test application and by MSDN. */
6415     if (src_rect)
6416     {
6417         if (src_surface)
6418         {
6419             if (src_rect->right < src_rect->left || src_rect->bottom < src_rect->top
6420                     || src_rect->left > src_surface->resource.width || src_rect->left < 0
6421                     || src_rect->top > src_surface->resource.height || src_rect->top < 0
6422                     || src_rect->right > src_surface->resource.width || src_rect->right < 0
6423                     || src_rect->bottom > src_surface->resource.height || src_rect->bottom < 0)
6424             {
6425                 WARN("Application gave us bad source rectangle for Blt.\n");
6426                 return WINEDDERR_INVALIDRECT;
6427             }
6428
6429             if (!src_rect->right || !src_rect->bottom
6430                     || src_rect->left == (int)src_surface->resource.width
6431                     || src_rect->top == (int)src_surface->resource.height)
6432             {
6433                 TRACE("Nothing to be done.\n");
6434                 return WINED3D_OK;
6435             }
6436         }
6437
6438         xsrc = *src_rect;
6439     }
6440     else if (src_surface)
6441     {
6442         xsrc.left = 0;
6443         xsrc.top = 0;
6444         xsrc.right = src_surface->resource.width;
6445         xsrc.bottom = src_surface->resource.height;
6446     }
6447     else
6448     {
6449         memset(&xsrc, 0, sizeof(xsrc));
6450     }
6451
6452     if (dst_rect)
6453     {
6454         /* For the Destination rect, it can be out of bounds on the condition
6455          * that a clipper is set for the given surface. */
6456         if (!dst_surface->clipper && (dst_rect->right < dst_rect->left || dst_rect->bottom < dst_rect->top
6457                 || dst_rect->left > dst_surface->resource.width || dst_rect->left < 0
6458                 || dst_rect->top > dst_surface->resource.height || dst_rect->top < 0
6459                 || dst_rect->right > dst_surface->resource.width || dst_rect->right < 0
6460                 || dst_rect->bottom > dst_surface->resource.height || dst_rect->bottom < 0))
6461         {
6462             WARN("Application gave us bad destination rectangle for Blt without a clipper set.\n");
6463             return WINEDDERR_INVALIDRECT;
6464         }
6465
6466         if (dst_rect->right <= 0 || dst_rect->bottom <= 0
6467                 || dst_rect->left >= (int)dst_surface->resource.width
6468                 || dst_rect->top >= (int)dst_surface->resource.height)
6469         {
6470             TRACE("Nothing to be done.\n");
6471             return WINED3D_OK;
6472         }
6473
6474         if (!src_surface)
6475         {
6476             RECT full_rect;
6477
6478             full_rect.left = 0;
6479             full_rect.top = 0;
6480             full_rect.right = dst_surface->resource.width;
6481             full_rect.bottom = dst_surface->resource.height;
6482             IntersectRect(&xdst, &full_rect, dst_rect);
6483         }
6484         else
6485         {
6486             BOOL clip_horiz, clip_vert;
6487
6488             xdst = *dst_rect;
6489             clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6490             clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6491
6492             if (clip_vert || clip_horiz)
6493             {
6494                 /* Now check if this is a special case or not... */
6495                 if ((flags & WINEDDBLT_DDFX)
6496                         || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6497                         || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6498                 {
6499                     WARN("Out of screen rectangle in special case. Not handled right now.\n");
6500                     return WINED3D_OK;
6501                 }
6502
6503                 if (clip_horiz)
6504                 {
6505                     if (xdst.left < 0)
6506                     {
6507                         xsrc.left -= xdst.left;
6508                         xdst.left = 0;
6509                     }
6510                     if (xdst.right > dst_surface->resource.width)
6511                     {
6512                         xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6513                         xdst.right = (int)dst_surface->resource.width;
6514                     }
6515                 }
6516
6517                 if (clip_vert)
6518                 {
6519                     if (xdst.top < 0)
6520                     {
6521                         xsrc.top -= xdst.top;
6522                         xdst.top = 0;
6523                     }
6524                     if (xdst.bottom > dst_surface->resource.height)
6525                     {
6526                         xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6527                         xdst.bottom = (int)dst_surface->resource.height;
6528                     }
6529                 }
6530
6531                 /* And check if after clipping something is still to be done... */
6532                 if ((xdst.right <= 0) || (xdst.bottom <= 0)
6533                         || (xdst.left >= (int)dst_surface->resource.width)
6534                         || (xdst.top >= (int)dst_surface->resource.height)
6535                         || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6536                         || (xsrc.left >= (int)src_surface->resource.width)
6537                         || (xsrc.top >= (int)src_surface->resource.height))
6538                 {
6539                     TRACE("Nothing to be done after clipping.\n");
6540                     return WINED3D_OK;
6541                 }
6542             }
6543         }
6544     }
6545     else
6546     {
6547         xdst.left = 0;
6548         xdst.top = 0;
6549         xdst.right = dst_surface->resource.width;
6550         xdst.bottom = dst_surface->resource.height;
6551     }
6552
6553     if (src_surface == dst_surface)
6554     {
6555         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6556         slock = dlock;
6557         src_format = dst_surface->resource.format;
6558         dst_format = src_format;
6559     }
6560     else
6561     {
6562         dst_format = dst_surface->resource.format;
6563         if (src_surface)
6564         {
6565             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6566             {
6567                 src_surface = surface_convert_format(src_surface, dst_format->id);
6568                 if (!src_surface)
6569                 {
6570                     /* The conv function writes a FIXME */
6571                     WARN("Cannot convert source surface format to dest format.\n");
6572                     goto release;
6573                 }
6574             }
6575             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6576             src_format = src_surface->resource.format;
6577         }
6578         else
6579         {
6580             src_format = dst_format;
6581         }
6582         if (dst_rect)
6583             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6584         else
6585             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6586     }
6587
6588     if (!fx || !(fx->dwDDFX)) flags &= ~WINEDDBLT_DDFX;
6589
6590     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_FOURCC)
6591     {
6592         if (!dst_rect || src_surface == dst_surface)
6593         {
6594             memcpy(dlock.pBits, slock.pBits, dst_surface->resource.size);
6595             goto release;
6596         }
6597     }
6598
6599     bpp = dst_surface->resource.format->byte_count;
6600     srcheight = xsrc.bottom - xsrc.top;
6601     srcwidth = xsrc.right - xsrc.left;
6602     dstheight = xdst.bottom - xdst.top;
6603     dstwidth = xdst.right - xdst.left;
6604     width = (xdst.right - xdst.left) * bpp;
6605
6606     if (dst_rect && src_surface != dst_surface)
6607         dbuf = dlock.pBits;
6608     else
6609         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6610
6611     if (flags & WINEDDBLT_WAIT)
6612     {
6613         flags &= ~WINEDDBLT_WAIT;
6614     }
6615     if (flags & WINEDDBLT_ASYNC)
6616     {
6617         static BOOL displayed = FALSE;
6618         if (!displayed)
6619             FIXME("Can't handle WINEDDBLT_ASYNC flag right now.\n");
6620         displayed = TRUE;
6621         flags &= ~WINEDDBLT_ASYNC;
6622     }
6623     if (flags & WINEDDBLT_DONOTWAIT)
6624     {
6625         /* WINEDDBLT_DONOTWAIT appeared in DX7 */
6626         static BOOL displayed = FALSE;
6627         if (!displayed)
6628             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag right now.\n");
6629         displayed = TRUE;
6630         flags &= ~WINEDDBLT_DONOTWAIT;
6631     }
6632
6633     /* First, all the 'source-less' blits */
6634     if (flags & WINEDDBLT_COLORFILL)
6635     {
6636         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6637         flags &= ~WINEDDBLT_COLORFILL;
6638     }
6639
6640     if (flags & WINEDDBLT_DEPTHFILL)
6641     {
6642         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6643     }
6644     if (flags & WINEDDBLT_ROP)
6645     {
6646         /* Catch some degenerate cases here. */
6647         switch (fx->dwROP)
6648         {
6649             case BLACKNESS:
6650                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6651                 break;
6652             case 0xAA0029: /* No-op */
6653                 break;
6654             case WHITENESS:
6655                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6656                 break;
6657             case SRCCOPY: /* Well, we do that below? */
6658                 break;
6659             default:
6660                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6661                 goto error;
6662         }
6663         flags &= ~WINEDDBLT_ROP;
6664     }
6665     if (flags & WINEDDBLT_DDROPS)
6666     {
6667         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6668     }
6669     /* Now the 'with source' blits. */
6670     if (src_surface)
6671     {
6672         const BYTE *sbase;
6673         int sx, xinc, sy, yinc;
6674
6675         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6676             goto release;
6677
6678         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6679                 && (srcwidth != dstwidth || srcheight != dstheight))
6680         {
6681             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6682             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6683         }
6684
6685         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6686         xinc = (srcwidth << 16) / dstwidth;
6687         yinc = (srcheight << 16) / dstheight;
6688
6689         if (!flags)
6690         {
6691             /* No effects, we can cheat here. */
6692             if (dstwidth == srcwidth)
6693             {
6694                 if (dstheight == srcheight)
6695                 {
6696                     /* No stretching in either direction. This needs to be as
6697                      * fast as possible. */
6698                     sbuf = sbase;
6699
6700                     /* Check for overlapping surfaces. */
6701                     if (src_surface != dst_surface || xdst.top < xsrc.top
6702                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6703                     {
6704                         /* No overlap, or dst above src, so copy from top downwards. */
6705                         for (y = 0; y < dstheight; ++y)
6706                         {
6707                             memcpy(dbuf, sbuf, width);
6708                             sbuf += slock.Pitch;
6709                             dbuf += dlock.Pitch;
6710                         }
6711                     }
6712                     else if (xdst.top > xsrc.top)
6713                     {
6714                         /* Copy from bottom upwards. */
6715                         sbuf += (slock.Pitch*dstheight);
6716                         dbuf += (dlock.Pitch*dstheight);
6717                         for (y = 0; y < dstheight; ++y)
6718                         {
6719                             sbuf -= slock.Pitch;
6720                             dbuf -= dlock.Pitch;
6721                             memcpy(dbuf, sbuf, width);
6722                         }
6723                     }
6724                     else
6725                     {
6726                         /* Src and dst overlapping on the same line, use memmove. */
6727                         for (y = 0; y < dstheight; ++y)
6728                         {
6729                             memmove(dbuf, sbuf, width);
6730                             sbuf += slock.Pitch;
6731                             dbuf += dlock.Pitch;
6732                         }
6733                     }
6734                 }
6735                 else
6736                 {
6737                     /* Stretching in y direction only. */
6738                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6739                     {
6740                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6741                         memcpy(dbuf, sbuf, width);
6742                         dbuf += dlock.Pitch;
6743                     }
6744                 }
6745             }
6746             else
6747             {
6748                 /* Stretching in X direction. */
6749                 int last_sy = -1;
6750                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6751                 {
6752                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6753
6754                     if ((sy >> 16) == (last_sy >> 16))
6755                     {
6756                         /* This source row is the same as last source row -
6757                          * Copy the already stretched row. */
6758                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6759                     }
6760                     else
6761                     {
6762 #define STRETCH_ROW(type) \
6763 do { \
6764     const type *s = (const type *)sbuf; \
6765     type *d = (type *)dbuf; \
6766     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6767         d[x] = s[sx >> 16]; \
6768 } while(0)
6769
6770                         switch(bpp)
6771                         {
6772                             case 1:
6773                                 STRETCH_ROW(BYTE);
6774                                 break;
6775                             case 2:
6776                                 STRETCH_ROW(WORD);
6777                                 break;
6778                             case 4:
6779                                 STRETCH_ROW(DWORD);
6780                                 break;
6781                             case 3:
6782                             {
6783                                 const BYTE *s;
6784                                 BYTE *d = dbuf;
6785                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6786                                 {
6787                                     DWORD pixel;
6788
6789                                     s = sbuf + 3 * (sx >> 16);
6790                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6791                                     d[0] = (pixel      ) & 0xff;
6792                                     d[1] = (pixel >>  8) & 0xff;
6793                                     d[2] = (pixel >> 16) & 0xff;
6794                                     d += 3;
6795                                 }
6796                                 break;
6797                             }
6798                             default:
6799                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6800                                 hr = WINED3DERR_NOTAVAILABLE;
6801                                 goto error;
6802                         }
6803 #undef STRETCH_ROW
6804                     }
6805                     dbuf += dlock.Pitch;
6806                     last_sy = sy;
6807                 }
6808             }
6809         }
6810         else
6811         {
6812             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6813             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6814             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6815             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6816             {
6817                 /* The color keying flags are checked for correctness in ddraw */
6818                 if (flags & WINEDDBLT_KEYSRC)
6819                 {
6820                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6821                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6822                 }
6823                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6824                 {
6825                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6826                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6827                 }
6828
6829                 if (flags & WINEDDBLT_KEYDEST)
6830                 {
6831                     /* Destination color keys are taken from the source surface! */
6832                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6833                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6834                 }
6835                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6836                 {
6837                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6838                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6839                 }
6840
6841                 if (bpp == 1)
6842                 {
6843                     keymask = 0xff;
6844                 }
6845                 else
6846                 {
6847                     keymask = src_format->red_mask
6848                             | src_format->green_mask
6849                             | src_format->blue_mask;
6850                 }
6851                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6852             }
6853
6854             if (flags & WINEDDBLT_DDFX)
6855             {
6856                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6857                 LONG tmpxy;
6858                 dTopLeft     = dbuf;
6859                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6860                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6861                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6862
6863                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6864                 {
6865                     /* I don't think we need to do anything about this flag */
6866                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6867                 }
6868                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6869                 {
6870                     tmp          = dTopRight;
6871                     dTopRight    = dTopLeft;
6872                     dTopLeft     = tmp;
6873                     tmp          = dBottomRight;
6874                     dBottomRight = dBottomLeft;
6875                     dBottomLeft  = tmp;
6876                     dstxinc = dstxinc * -1;
6877                 }
6878                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6879                 {
6880                     tmp          = dTopLeft;
6881                     dTopLeft     = dBottomLeft;
6882                     dBottomLeft  = tmp;
6883                     tmp          = dTopRight;
6884                     dTopRight    = dBottomRight;
6885                     dBottomRight = tmp;
6886                     dstyinc = dstyinc * -1;
6887                 }
6888                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6889                 {
6890                     /* I don't think we need to do anything about this flag */
6891                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6892                 }
6893                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6894                 {
6895                     tmp          = dBottomRight;
6896                     dBottomRight = dTopLeft;
6897                     dTopLeft     = tmp;
6898                     tmp          = dBottomLeft;
6899                     dBottomLeft  = dTopRight;
6900                     dTopRight    = tmp;
6901                     dstxinc = dstxinc * -1;
6902                     dstyinc = dstyinc * -1;
6903                 }
6904                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6905                 {
6906                     tmp          = dTopLeft;
6907                     dTopLeft     = dBottomLeft;
6908                     dBottomLeft  = dBottomRight;
6909                     dBottomRight = dTopRight;
6910                     dTopRight    = tmp;
6911                     tmpxy   = dstxinc;
6912                     dstxinc = dstyinc;
6913                     dstyinc = tmpxy;
6914                     dstxinc = dstxinc * -1;
6915                 }
6916                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6917                 {
6918                     tmp          = dTopLeft;
6919                     dTopLeft     = dTopRight;
6920                     dTopRight    = dBottomRight;
6921                     dBottomRight = dBottomLeft;
6922                     dBottomLeft  = tmp;
6923                     tmpxy   = dstxinc;
6924                     dstxinc = dstyinc;
6925                     dstyinc = tmpxy;
6926                     dstyinc = dstyinc * -1;
6927                 }
6928                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6929                 {
6930                     /* I don't think we need to do anything about this flag */
6931                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6932                 }
6933                 dbuf = dTopLeft;
6934                 flags &= ~(WINEDDBLT_DDFX);
6935             }
6936
6937 #define COPY_COLORKEY_FX(type) \
6938 do { \
6939     const type *s; \
6940     type *d = (type *)dbuf, *dx, tmp; \
6941     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6942     { \
6943         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6944         dx = d; \
6945         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6946         { \
6947             tmp = s[sx >> 16]; \
6948             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6949                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6950             { \
6951                 dx[0] = tmp; \
6952             } \
6953             dx = (type *)(((BYTE *)dx) + dstxinc); \
6954         } \
6955         d = (type *)(((BYTE *)d) + dstyinc); \
6956     } \
6957 } while(0)
6958
6959             switch (bpp)
6960             {
6961                 case 1:
6962                     COPY_COLORKEY_FX(BYTE);
6963                     break;
6964                 case 2:
6965                     COPY_COLORKEY_FX(WORD);
6966                     break;
6967                 case 4:
6968                     COPY_COLORKEY_FX(DWORD);
6969                     break;
6970                 case 3:
6971                 {
6972                     const BYTE *s;
6973                     BYTE *d = dbuf, *dx;
6974                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6975                     {
6976                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6977                         dx = d;
6978                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
6979                         {
6980                             DWORD pixel, dpixel = 0;
6981                             s = sbuf + 3 * (sx>>16);
6982                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6983                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
6984                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
6985                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
6986                             {
6987                                 dx[0] = (pixel      ) & 0xff;
6988                                 dx[1] = (pixel >>  8) & 0xff;
6989                                 dx[2] = (pixel >> 16) & 0xff;
6990                             }
6991                             dx += dstxinc;
6992                         }
6993                         d += dstyinc;
6994                     }
6995                     break;
6996                 }
6997                 default:
6998                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
6999                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7000                     hr = WINED3DERR_NOTAVAILABLE;
7001                     goto error;
7002 #undef COPY_COLORKEY_FX
7003             }
7004         }
7005     }
7006
7007 error:
7008     if (flags && FIXME_ON(d3d_surface))
7009     {
7010         FIXME("\tUnsupported flags: %#x.\n", flags);
7011     }
7012
7013 release:
7014     wined3d_surface_unmap(dst_surface);
7015     if (src_surface && src_surface != dst_surface)
7016         wined3d_surface_unmap(src_surface);
7017     /* Release the converted surface, if any. */
7018     if (src_surface && src_surface != orig_src)
7019         wined3d_surface_decref(src_surface);
7020
7021     return hr;
7022 }
7023
7024 static HRESULT surface_cpu_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
7025         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD trans)
7026 {
7027     const struct wined3d_format *src_format, *dst_format;
7028     RECT lock_src, lock_dst, lock_union;
7029     WINED3DLOCKED_RECT dlock, slock;
7030     HRESULT hr = WINED3D_OK;
7031     int bpp, w, h, x, y;
7032     const BYTE *sbuf;
7033     BYTE *dbuf;
7034     RECT rsrc2;
7035
7036     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
7037             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
7038
7039     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface->flags & SFLAG_LOCKED))
7040     {
7041         WARN(" Surface is busy, returning DDERR_SURFACEBUSY\n");
7042         return WINEDDERR_SURFACEBUSY;
7043     }
7044
7045     if (!src_rect)
7046     {
7047         WARN("src_rect is NULL!\n");
7048         rsrc2.left = 0;
7049         rsrc2.top = 0;
7050         rsrc2.right = src_surface->resource.width;
7051         rsrc2.bottom = src_surface->resource.height;
7052         src_rect = &rsrc2;
7053     }
7054
7055     /* Check source rect for validity. Copied from normal Blt. Fixes Baldur's Gate. */
7056     if ((src_rect->bottom > src_surface->resource.height) || (src_rect->bottom < 0)
7057             || (src_rect->top > src_surface->resource.height) || (src_rect->top < 0)
7058             || (src_rect->left > src_surface->resource.width) || (src_rect->left < 0)
7059             || (src_rect->right > src_surface->resource.width) || (src_rect->right < 0)
7060             || (src_rect->right < src_rect->left) || (src_rect->bottom < src_rect->top))
7061     {
7062         WARN("Application gave us bad source rectangle for BltFast.\n");
7063         return WINEDDERR_INVALIDRECT;
7064     }
7065
7066     h = src_rect->bottom - src_rect->top;
7067     if (h > dst_surface->resource.height - dst_y)
7068         h = dst_surface->resource.height - dst_y;
7069     if (h > src_surface->resource.height - src_rect->top)
7070         h = src_surface->resource.height - src_rect->top;
7071     if (h <= 0)
7072         return WINEDDERR_INVALIDRECT;
7073
7074     w = src_rect->right - src_rect->left;
7075     if (w > dst_surface->resource.width - dst_x)
7076         w = dst_surface->resource.width - dst_x;
7077     if (w > src_surface->resource.width - src_rect->left)
7078         w = src_surface->resource.width - src_rect->left;
7079     if (w <= 0)
7080         return WINEDDERR_INVALIDRECT;
7081
7082     /* Now compute the locking rectangle... */
7083     lock_src.left = src_rect->left;
7084     lock_src.top = src_rect->top;
7085     lock_src.right = lock_src.left + w;
7086     lock_src.bottom = lock_src.top + h;
7087
7088     lock_dst.left = dst_x;
7089     lock_dst.top = dst_y;
7090     lock_dst.right = dst_x + w;
7091     lock_dst.bottom = dst_y + h;
7092
7093     bpp = dst_surface->resource.format->byte_count;
7094
7095     /* We need to lock the surfaces, or we won't get refreshes when done. */
7096     if (src_surface == dst_surface)
7097     {
7098         int pitch;
7099
7100         UnionRect(&lock_union, &lock_src, &lock_dst);
7101
7102         /* Lock the union of the two rectangles. */
7103         hr = wined3d_surface_map(dst_surface, &dlock, &lock_union, 0);
7104         if (FAILED(hr))
7105             goto error;
7106
7107         pitch = dlock.Pitch;
7108         slock.Pitch = dlock.Pitch;
7109
7110         /* Since slock was originally copied from this surface's description, we can just reuse it. */
7111         sbuf = dst_surface->resource.allocatedMemory + lock_src.top * pitch + lock_src.left * bpp;
7112         dbuf = dst_surface->resource.allocatedMemory + lock_dst.top * pitch + lock_dst.left * bpp;
7113         src_format = src_surface->resource.format;
7114         dst_format = src_format;
7115     }
7116     else
7117     {
7118         hr = wined3d_surface_map(src_surface, &slock, &lock_src, WINED3DLOCK_READONLY);
7119         if (FAILED(hr))
7120             goto error;
7121         hr = wined3d_surface_map(dst_surface, &dlock, &lock_dst, 0);
7122         if (FAILED(hr))
7123             goto error;
7124
7125         sbuf = slock.pBits;
7126         dbuf = dlock.pBits;
7127         TRACE("Dst is at %p, Src is at %p.\n", dbuf, sbuf);
7128
7129         src_format = src_surface->resource.format;
7130         dst_format = dst_surface->resource.format;
7131     }
7132
7133     /* Handle compressed surfaces first... */
7134     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
7135     {
7136         UINT row_block_count;
7137
7138         TRACE("compressed -> compressed copy\n");
7139         if (trans)
7140             FIXME("trans arg not supported when a compressed surface is involved\n");
7141         if (dst_x || dst_y)
7142             FIXME("offset for destination surface is not supported\n");
7143         if (src_surface->resource.format->id != dst_surface->resource.format->id)
7144         {
7145             FIXME("compressed -> compressed copy only supported for the same type of surface\n");
7146             hr = WINED3DERR_WRONGTEXTUREFORMAT;
7147             goto error;
7148         }
7149
7150         row_block_count = (w + dst_format->block_width - 1) / dst_format->block_width;
7151         for (y = 0; y < h; y += dst_format->block_height)
7152         {
7153             memcpy(dbuf, sbuf, row_block_count * dst_format->block_byte_count);
7154             dbuf += dlock.Pitch;
7155             sbuf += slock.Pitch;
7156         }
7157
7158         goto error;
7159     }
7160     if ((src_format->flags & WINED3DFMT_FLAG_COMPRESSED) && !(dst_format->flags & WINED3DFMT_FLAG_COMPRESSED))
7161     {
7162         /* TODO: Use the libtxc_dxtn.so shared library to do software
7163          * decompression. */
7164         ERR("Software decompression not supported.\n");
7165         goto error;
7166     }
7167
7168     if (trans & (WINEDDBLTFAST_SRCCOLORKEY | WINEDDBLTFAST_DESTCOLORKEY))
7169     {
7170         DWORD keylow, keyhigh;
7171         DWORD mask = src_surface->resource.format->red_mask
7172                 | src_surface->resource.format->green_mask
7173                 | src_surface->resource.format->blue_mask;
7174
7175         /* For some 8-bit formats like L8 and P8 color masks don't make sense */
7176         if (!mask && bpp == 1)
7177             mask = 0xff;
7178
7179         TRACE("Color keyed copy.\n");
7180         if (trans & WINEDDBLTFAST_SRCCOLORKEY)
7181         {
7182             keylow = src_surface->SrcBltCKey.dwColorSpaceLowValue;
7183             keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
7184         }
7185         else
7186         {
7187             /* I'm not sure if this is correct. */
7188             FIXME("WINEDDBLTFAST_DESTCOLORKEY not fully supported yet.\n");
7189             keylow = dst_surface->DestBltCKey.dwColorSpaceLowValue;
7190             keyhigh = dst_surface->DestBltCKey.dwColorSpaceHighValue;
7191         }
7192
7193 #define COPYBOX_COLORKEY(type) \
7194 do { \
7195     const type *s = (const type *)sbuf; \
7196     type *d = (type *)dbuf; \
7197     type tmp; \
7198     for (y = 0; y < h; y++) \
7199     { \
7200         for (x = 0; x < w; x++) \
7201         { \
7202             tmp = s[x]; \
7203             if ((tmp & mask) < keylow || (tmp & mask) > keyhigh) d[x] = tmp; \
7204         } \
7205         s = (const type *)((const BYTE *)s + slock.Pitch); \
7206         d = (type *)((BYTE *)d + dlock.Pitch); \
7207     } \
7208 } while(0)
7209
7210         switch (bpp)
7211         {
7212             case 1:
7213                 COPYBOX_COLORKEY(BYTE);
7214                 break;
7215             case 2:
7216                 COPYBOX_COLORKEY(WORD);
7217                 break;
7218             case 4:
7219                 COPYBOX_COLORKEY(DWORD);
7220                 break;
7221             case 3:
7222             {
7223                 const BYTE *s;
7224                 DWORD tmp;
7225                 BYTE *d;
7226                 s = sbuf;
7227                 d = dbuf;
7228                 for (y = 0; y < h; ++y)
7229                 {
7230                     for (x = 0; x < w * 3; x += 3)
7231                     {
7232                         tmp = (DWORD)s[x] + ((DWORD)s[x + 1] << 8) + ((DWORD)s[x + 2] << 16);
7233                         if (tmp < keylow || tmp > keyhigh)
7234                         {
7235                             d[x + 0] = s[x + 0];
7236                             d[x + 1] = s[x + 1];
7237                             d[x + 2] = s[x + 2];
7238                         }
7239                     }
7240                     s += slock.Pitch;
7241                     d += dlock.Pitch;
7242                 }
7243                 break;
7244             }
7245             default:
7246                 FIXME("Source color key blitting not supported for bpp %u.\n", bpp * 8);
7247                 hr = WINED3DERR_NOTAVAILABLE;
7248                 goto error;
7249         }
7250 #undef COPYBOX_COLORKEY
7251         TRACE("Copy done.\n");
7252     }
7253     else
7254     {
7255         int width = w * bpp;
7256         INT sbufpitch, dbufpitch;
7257
7258         TRACE("No color key copy.\n");
7259         /* Handle overlapping surfaces. */
7260         if (sbuf < dbuf)
7261         {
7262             sbuf += (h - 1) * slock.Pitch;
7263             dbuf += (h - 1) * dlock.Pitch;
7264             sbufpitch = -slock.Pitch;
7265             dbufpitch = -dlock.Pitch;
7266         }
7267         else
7268         {
7269             sbufpitch = slock.Pitch;
7270             dbufpitch = dlock.Pitch;
7271         }
7272         for (y = 0; y < h; ++y)
7273         {
7274             /* This is pretty easy, a line for line memcpy. */
7275             memmove(dbuf, sbuf, width);
7276             sbuf += sbufpitch;
7277             dbuf += dbufpitch;
7278         }
7279         TRACE("Copy done.\n");
7280     }
7281
7282 error:
7283     if (src_surface == dst_surface)
7284     {
7285         wined3d_surface_unmap(dst_surface);
7286     }
7287     else
7288     {
7289         wined3d_surface_unmap(dst_surface);
7290         wined3d_surface_unmap(src_surface);
7291     }
7292
7293     return hr;
7294 }
7295
7296 /* Do not call while under the GL lock. */
7297 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7298         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7299 {
7300     WINEDDBLTFX BltFx;
7301
7302     memset(&BltFx, 0, sizeof(BltFx));
7303     BltFx.dwSize = sizeof(BltFx);
7304     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface->resource.format, color);
7305     return wined3d_surface_blt(dst_surface, dst_rect, NULL, NULL,
7306             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7307 }
7308
7309 /* Do not call while under the GL lock. */
7310 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7311         struct wined3d_surface *surface, const RECT *rect, float depth)
7312 {
7313     FIXME("Depth filling not implemented by cpu_blit.\n");
7314     return WINED3DERR_INVALIDCALL;
7315 }
7316
7317 const struct blit_shader cpu_blit =  {
7318     cpu_blit_alloc,
7319     cpu_blit_free,
7320     cpu_blit_set,
7321     cpu_blit_unset,
7322     cpu_blit_supported,
7323     cpu_blit_color_fill,
7324     cpu_blit_depth_fill,
7325 };
7326
7327 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7328         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7329         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7330         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7331 {
7332     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7333     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7334     unsigned int resource_size;
7335     HRESULT hr;
7336
7337     if (multisample_quality > 0)
7338     {
7339         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7340         multisample_quality = 0;
7341     }
7342
7343     /* Quick lockable sanity check.
7344      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7345      * this function is too deep to need to care about things like this.
7346      * Levels need to be checked too, since they all affect what can be done. */
7347     switch (pool)
7348     {
7349         case WINED3DPOOL_SCRATCH:
7350             if (!lockable)
7351             {
7352                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7353                         "which are mutually exclusive, setting lockable to TRUE.\n");
7354                 lockable = TRUE;
7355             }
7356             break;
7357
7358         case WINED3DPOOL_SYSTEMMEM:
7359             if (!lockable)
7360                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7361             break;
7362
7363         case WINED3DPOOL_MANAGED:
7364             if (usage & WINED3DUSAGE_DYNAMIC)
7365                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7366             break;
7367
7368         case WINED3DPOOL_DEFAULT:
7369             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7370                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7371             break;
7372
7373         default:
7374             FIXME("Unknown pool %#x.\n", pool);
7375             break;
7376     };
7377
7378     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7379         FIXME("Trying to create a render target that isn't in the default pool.\n");
7380
7381     /* FIXME: Check that the format is supported by the device. */
7382
7383     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7384     if (!resource_size)
7385         return WINED3DERR_INVALIDCALL;
7386
7387     surface->surface_type = surface_type;
7388
7389     switch (surface_type)
7390     {
7391         case SURFACE_OPENGL:
7392             surface->surface_ops = &surface_ops;
7393             break;
7394
7395         case SURFACE_GDI:
7396             surface->surface_ops = &gdi_surface_ops;
7397             break;
7398
7399         default:
7400             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7401             return WINED3DERR_INVALIDCALL;
7402     }
7403
7404     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7405             multisample_type, multisample_quality, usage, pool, width, height, 1,
7406             resource_size, parent, parent_ops, &surface_resource_ops);
7407     if (FAILED(hr))
7408     {
7409         WARN("Failed to initialize resource, returning %#x.\n", hr);
7410         return hr;
7411     }
7412
7413     /* "Standalone" surface. */
7414     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7415
7416     surface->texture_level = level;
7417     list_init(&surface->overlays);
7418
7419     /* Flags */
7420     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7421     if (discard)
7422         surface->flags |= SFLAG_DISCARD;
7423     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7424         surface->flags |= SFLAG_LOCKABLE;
7425     /* I'm not sure if this qualifies as a hack or as an optimization. It
7426      * seems reasonable to assume that lockable render targets will get
7427      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7428      * creation. However, the other reason we want to do this is that several
7429      * ddraw applications access surface memory while the surface isn't
7430      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7431      * future locks prevents these from crashing. */
7432     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7433         surface->flags |= SFLAG_DYNLOCK;
7434
7435     /* Mark the texture as dirty so that it gets loaded first time around. */
7436     surface_add_dirty_rect(surface, NULL);
7437     list_init(&surface->renderbuffers);
7438
7439     TRACE("surface %p, memory %p, size %u\n",
7440             surface, surface->resource.allocatedMemory, surface->resource.size);
7441
7442     /* Call the private setup routine */
7443     hr = surface->surface_ops->surface_private_setup(surface);
7444     if (FAILED(hr))
7445     {
7446         ERR("Private setup failed, returning %#x\n", hr);
7447         surface->surface_ops->surface_cleanup(surface);
7448         return hr;
7449     }
7450
7451     return hr;
7452 }
7453
7454 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7455         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7456         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7457         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7458 {
7459     struct wined3d_surface *object;
7460     HRESULT hr;
7461
7462     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7463             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7464     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7465             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7466     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7467
7468     if (surface_type == SURFACE_OPENGL && !device->adapter)
7469     {
7470         ERR("OpenGL surfaces are not available without OpenGL.\n");
7471         return WINED3DERR_NOTAVAILABLE;
7472     }
7473
7474     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7475     if (!object)
7476     {
7477         ERR("Failed to allocate surface memory.\n");
7478         return WINED3DERR_OUTOFVIDEOMEMORY;
7479     }
7480
7481     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7482             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7483     if (FAILED(hr))
7484     {
7485         WARN("Failed to initialize surface, returning %#x.\n", hr);
7486         HeapFree(GetProcessHeap(), 0, object);
7487         return hr;
7488     }
7489
7490     TRACE("Created surface %p.\n", object);
7491     *surface = object;
7492
7493     return hr;
7494 }