wineps.drv: Ignore requested resolutions not supported by device.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         enum wined3d_texture_filter_type filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         if (surface->texture_name)
61         {
62             TRACE("Deleting texture %u.\n", surface->texture_name);
63             gl_info->gl_ops.gl.p_glDeleteTextures(1, &surface->texture_name);
64         }
65
66         if (surface->flags & SFLAG_PBO)
67         {
68             TRACE("Deleting PBO %u.\n", surface->pbo);
69             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
70         }
71
72         if (surface->rb_multisample)
73         {
74             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
75             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
76         }
77
78         if (surface->rb_resolved)
79         {
80             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
81             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
82         }
83
84         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
85         {
86             TRACE("Deleting renderbuffer %u.\n", entry->id);
87             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
88             HeapFree(GetProcessHeap(), 0, entry);
89         }
90
91         context_release(context);
92     }
93
94     if (surface->flags & SFLAG_DIBSECTION)
95     {
96         DeleteDC(surface->hDC);
97         DeleteObject(surface->dib.DIBsection);
98         surface->dib.bitmap_data = NULL;
99         surface->resource.allocatedMemory = NULL;
100     }
101
102     if (surface->flags & SFLAG_USERPTR)
103         wined3d_surface_set_mem(surface, NULL);
104     if (surface->overlay_dest)
105         list_remove(&surface->overlay_entry);
106
107     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
108     {
109         list_remove(&overlay->overlay_entry);
110         overlay->overlay_dest = NULL;
111     }
112
113     resource_cleanup(&surface->resource);
114 }
115
116 void surface_update_draw_binding(struct wined3d_surface *surface)
117 {
118     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
119         surface->draw_binding = SFLAG_INDRAWABLE;
120     else if (surface->resource.multisample_type)
121         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
122     else
123         surface->draw_binding = SFLAG_INTEXTURE;
124 }
125
126 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
127 {
128     TRACE("surface %p, container %p.\n", surface, container);
129
130     if (!container && type != WINED3D_CONTAINER_NONE)
131         ERR("Setting NULL container of type %#x.\n", type);
132
133     if (type == WINED3D_CONTAINER_SWAPCHAIN)
134     {
135         surface->get_drawable_size = get_drawable_size_swapchain;
136     }
137     else
138     {
139         switch (wined3d_settings.offscreen_rendering_mode)
140         {
141             case ORM_FBO:
142                 surface->get_drawable_size = get_drawable_size_fbo;
143                 break;
144
145             case ORM_BACKBUFFER:
146                 surface->get_drawable_size = get_drawable_size_backbuffer;
147                 break;
148
149             default:
150                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
151                 return;
152         }
153     }
154
155     surface->container.type = type;
156     surface->container.u.base = container;
157     surface_update_draw_binding(surface);
158 }
159
160 struct blt_info
161 {
162     GLenum binding;
163     GLenum bind_target;
164     enum tex_types tex_type;
165     GLfloat coords[4][3];
166 };
167
168 struct float_rect
169 {
170     float l;
171     float t;
172     float r;
173     float b;
174 };
175
176 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
177 {
178     f->l = ((r->left * 2.0f) / w) - 1.0f;
179     f->t = ((r->top * 2.0f) / h) - 1.0f;
180     f->r = ((r->right * 2.0f) / w) - 1.0f;
181     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
182 }
183
184 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
185 {
186     GLfloat (*coords)[3] = info->coords;
187     struct float_rect f;
188
189     switch (target)
190     {
191         default:
192             FIXME("Unsupported texture target %#x\n", target);
193             /* Fall back to GL_TEXTURE_2D */
194         case GL_TEXTURE_2D:
195             info->binding = GL_TEXTURE_BINDING_2D;
196             info->bind_target = GL_TEXTURE_2D;
197             info->tex_type = tex_2d;
198             coords[0][0] = (float)rect->left / w;
199             coords[0][1] = (float)rect->top / h;
200             coords[0][2] = 0.0f;
201
202             coords[1][0] = (float)rect->right / w;
203             coords[1][1] = (float)rect->top / h;
204             coords[1][2] = 0.0f;
205
206             coords[2][0] = (float)rect->left / w;
207             coords[2][1] = (float)rect->bottom / h;
208             coords[2][2] = 0.0f;
209
210             coords[3][0] = (float)rect->right / w;
211             coords[3][1] = (float)rect->bottom / h;
212             coords[3][2] = 0.0f;
213             break;
214
215         case GL_TEXTURE_RECTANGLE_ARB:
216             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
217             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
218             info->tex_type = tex_rect;
219             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
220             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
221             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
222             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
223             break;
224
225         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
226             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
227             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
228             info->tex_type = tex_cube;
229             cube_coords_float(rect, w, h, &f);
230
231             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
232             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
233             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
234             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
235             break;
236
237         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
238             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
239             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
240             info->tex_type = tex_cube;
241             cube_coords_float(rect, w, h, &f);
242
243             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
244             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
245             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
246             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
247             break;
248
249         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
250             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
251             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
252             info->tex_type = tex_cube;
253             cube_coords_float(rect, w, h, &f);
254
255             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
256             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
257             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
258             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
259             break;
260
261         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
262             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
263             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
264             info->tex_type = tex_cube;
265             cube_coords_float(rect, w, h, &f);
266
267             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
268             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
269             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
270             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
271             break;
272
273         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
274             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
275             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
276             info->tex_type = tex_cube;
277             cube_coords_float(rect, w, h, &f);
278
279             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
280             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
281             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
282             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
283             break;
284
285         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
286             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
287             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
288             info->tex_type = tex_cube;
289             cube_coords_float(rect, w, h, &f);
290
291             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
292             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
293             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
294             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
295             break;
296     }
297 }
298
299 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
300 {
301     if (rect_in)
302         *rect_out = *rect_in;
303     else
304     {
305         rect_out->left = 0;
306         rect_out->top = 0;
307         rect_out->right = surface->resource.width;
308         rect_out->bottom = surface->resource.height;
309     }
310 }
311
312 /* Context activation is done by the caller. */
313 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
314         const RECT *src_rect, const RECT *dst_rect, enum wined3d_texture_filter_type filter)
315 {
316     const struct wined3d_gl_info *gl_info = context->gl_info;
317     struct blt_info info;
318
319     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
320
321     gl_info->gl_ops.gl.p_glEnable(info.bind_target);
322     checkGLcall("glEnable(bind_target)");
323
324     context_bind_texture(context, info.bind_target, src_surface->texture_name);
325
326     /* Filtering for StretchRect */
327     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
328             wined3d_gl_mag_filter(magLookup, filter));
329     checkGLcall("glTexParameteri");
330     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
331             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
332     checkGLcall("glTexParameteri");
333     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
334     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
335     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
336         gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
337     gl_info->gl_ops.gl.p_glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
338     checkGLcall("glTexEnvi");
339
340     /* Draw a quad */
341     gl_info->gl_ops.gl.p_glBegin(GL_TRIANGLE_STRIP);
342     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[0]);
343     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->left, dst_rect->top);
344
345     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[1]);
346     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->right, dst_rect->top);
347
348     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[2]);
349     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->left, dst_rect->bottom);
350
351     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[3]);
352     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->right, dst_rect->bottom);
353     gl_info->gl_ops.gl.p_glEnd();
354
355     /* Unbind the texture */
356     context_bind_texture(context, info.bind_target, 0);
357
358     /* We changed the filtering settings on the texture. Inform the
359      * container about this to get the filters reset properly next draw. */
360     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
361     {
362         struct wined3d_texture *texture = src_surface->container.u.texture;
363         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3D_TEXF_POINT;
364         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3D_TEXF_POINT;
365         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3D_TEXF_NONE;
366         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
367     }
368 }
369
370 /* Works correctly only for <= 4 bpp formats. */
371 static void get_color_masks(const struct wined3d_format *format, DWORD *masks)
372 {
373     masks[0] = ((1 << format->red_size) - 1) << format->red_offset;
374     masks[1] = ((1 << format->green_size) - 1) << format->green_offset;
375     masks[2] = ((1 << format->blue_size) - 1) << format->blue_offset;
376 }
377
378 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
379 {
380     const struct wined3d_format *format = surface->resource.format;
381     SYSTEM_INFO sysInfo;
382     BITMAPINFO *b_info;
383     int extraline = 0;
384     DWORD *masks;
385
386     TRACE("surface %p.\n", surface);
387
388     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
389     {
390         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
391         return WINED3DERR_INVALIDCALL;
392     }
393
394     switch (format->byte_count)
395     {
396         case 2:
397         case 4:
398             /* Allocate extra space to store the RGB bit masks. */
399             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
400             break;
401
402         case 3:
403             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
404             break;
405
406         default:
407             /* Allocate extra space for a palette. */
408             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
409                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
410             break;
411     }
412
413     if (!b_info)
414         return E_OUTOFMEMORY;
415
416     /* Some applications access the surface in via DWORDs, and do not take
417      * the necessary care at the end of the surface. So we need at least
418      * 4 extra bytes at the end of the surface. Check against the page size,
419      * if the last page used for the surface has at least 4 spare bytes we're
420      * safe, otherwise add an extra line to the DIB section. */
421     GetSystemInfo(&sysInfo);
422     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
423     {
424         extraline = 1;
425         TRACE("Adding an extra line to the DIB section.\n");
426     }
427
428     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
429     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
430     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
431     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
432     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
433             * wined3d_surface_get_pitch(surface);
434     b_info->bmiHeader.biPlanes = 1;
435     b_info->bmiHeader.biBitCount = format->byte_count * 8;
436
437     b_info->bmiHeader.biXPelsPerMeter = 0;
438     b_info->bmiHeader.biYPelsPerMeter = 0;
439     b_info->bmiHeader.biClrUsed = 0;
440     b_info->bmiHeader.biClrImportant = 0;
441
442     /* Get the bit masks */
443     masks = (DWORD *)b_info->bmiColors;
444     switch (surface->resource.format->id)
445     {
446         case WINED3DFMT_B8G8R8_UNORM:
447             b_info->bmiHeader.biCompression = BI_RGB;
448             break;
449
450         case WINED3DFMT_B5G5R5X1_UNORM:
451         case WINED3DFMT_B5G5R5A1_UNORM:
452         case WINED3DFMT_B4G4R4A4_UNORM:
453         case WINED3DFMT_B4G4R4X4_UNORM:
454         case WINED3DFMT_B2G3R3_UNORM:
455         case WINED3DFMT_B2G3R3A8_UNORM:
456         case WINED3DFMT_R10G10B10A2_UNORM:
457         case WINED3DFMT_R8G8B8A8_UNORM:
458         case WINED3DFMT_R8G8B8X8_UNORM:
459         case WINED3DFMT_B10G10R10A2_UNORM:
460         case WINED3DFMT_B5G6R5_UNORM:
461         case WINED3DFMT_R16G16B16A16_UNORM:
462             b_info->bmiHeader.biCompression = BI_BITFIELDS;
463             get_color_masks(format, masks);
464             break;
465
466         default:
467             /* Don't know palette */
468             b_info->bmiHeader.biCompression = BI_RGB;
469             break;
470     }
471
472     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
473             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
474             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
475     surface->dib.DIBsection = CreateDIBSection(0, b_info, DIB_RGB_COLORS, &surface->dib.bitmap_data, 0, 0);
476
477     if (!surface->dib.DIBsection)
478     {
479         ERR("Failed to create DIB section.\n");
480         HeapFree(GetProcessHeap(), 0, b_info);
481         return HRESULT_FROM_WIN32(GetLastError());
482     }
483
484     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
485     /* Copy the existing surface to the dib section. */
486     if (surface->resource.allocatedMemory)
487     {
488         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
489                 surface->resource.height * wined3d_surface_get_pitch(surface));
490     }
491     else
492     {
493         /* This is to make maps read the GL texture although memory is allocated. */
494         surface->flags &= ~SFLAG_INSYSMEM;
495     }
496     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
497
498     HeapFree(GetProcessHeap(), 0, b_info);
499
500     /* Now allocate a DC. */
501     surface->hDC = CreateCompatibleDC(0);
502     SelectObject(surface->hDC, surface->dib.DIBsection);
503     TRACE("Using wined3d palette %p.\n", surface->palette);
504     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
505
506     surface->flags |= SFLAG_DIBSECTION;
507
508     return WINED3D_OK;
509 }
510
511 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
512 {
513     if (surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
514         return FALSE;
515     if (!(surface->flags & SFLAG_DYNLOCK))
516         return FALSE;
517     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
518         return FALSE;
519     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
520         return FALSE;
521
522     return TRUE;
523 }
524
525 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
526 {
527     struct wined3d_context *context;
528     GLenum error;
529
530     context = context_acquire(surface->resource.device, NULL);
531
532     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
533     error = gl_info->gl_ops.gl.p_glGetError();
534     if (!surface->pbo || error != GL_NO_ERROR)
535         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
536
537     TRACE("Binding PBO %u.\n", surface->pbo);
538
539     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
540     checkGLcall("glBindBufferARB");
541
542     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
543             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
544     checkGLcall("glBufferDataARB");
545
546     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
547     checkGLcall("glBindBufferARB");
548
549     /* We don't need the system memory anymore and we can't even use it for PBOs. */
550     if (!(surface->flags & SFLAG_CLIENT))
551     {
552         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
553         surface->resource.heapMemory = NULL;
554     }
555     surface->resource.allocatedMemory = NULL;
556     surface->flags |= SFLAG_PBO;
557     context_release(context);
558 }
559
560 static void surface_prepare_system_memory(struct wined3d_surface *surface)
561 {
562     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
563
564     TRACE("surface %p.\n", surface);
565
566     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
567         surface_load_pbo(surface, gl_info);
568     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
569     {
570         /* Whatever surface we have, make sure that there is memory allocated
571          * for the downloaded copy, or a PBO to map. */
572         if (!surface->resource.heapMemory)
573             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
574
575         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
576                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
577
578         if (surface->flags & SFLAG_INSYSMEM)
579             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
580     }
581 }
582
583 static void surface_evict_sysmem(struct wined3d_surface *surface)
584 {
585     if (surface->resource.map_count || (surface->flags & SFLAG_DONOTFREE))
586         return;
587
588     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
589     surface->resource.allocatedMemory = NULL;
590     surface->resource.heapMemory = NULL;
591     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
592 }
593
594 /* Context activation is done by the caller. */
595 static void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
596 {
597     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
598
599     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
600     {
601         struct wined3d_texture *texture = surface->container.u.texture;
602
603         TRACE("Passing to container (%p).\n", texture);
604         texture->texture_ops->texture_bind(texture, context, srgb);
605     }
606     else
607     {
608         const struct wined3d_gl_info *gl_info = context->gl_info;
609
610         if (surface->texture_level)
611         {
612             ERR("Standalone surface %p is non-zero texture level %u.\n",
613                     surface, surface->texture_level);
614         }
615
616         if (srgb)
617             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
618
619         if (!surface->texture_name)
620         {
621             gl_info->gl_ops.gl.p_glGenTextures(1, &surface->texture_name);
622             checkGLcall("glGenTextures");
623
624             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
625
626             context_bind_texture(context, surface->texture_target, surface->texture_name);
627             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
628             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
629             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
630             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
631             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
632             checkGLcall("glTexParameteri");
633         }
634         else
635         {
636             context_bind_texture(context, surface->texture_target, surface->texture_name);
637         }
638     }
639 }
640
641 /* Context activation is done by the caller. */
642 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
643         struct wined3d_context *context, BOOL srgb)
644 {
645     struct wined3d_device *device = surface->resource.device;
646     DWORD active_sampler;
647
648     /* We don't need a specific texture unit, but after binding the texture
649      * the current unit is dirty. Read the unit back instead of switching to
650      * 0, this avoids messing around with the state manager's GL states. The
651      * current texture unit should always be a valid one.
652      *
653      * To be more specific, this is tricky because we can implicitly be
654      * called from sampler() in state.c. This means we can't touch anything
655      * other than whatever happens to be the currently active texture, or we
656      * would risk marking already applied sampler states dirty again. */
657     active_sampler = device->rev_tex_unit_map[context->active_texture];
658
659     if (active_sampler != WINED3D_UNMAPPED_STAGE)
660         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
661     surface_bind(surface, context, srgb);
662 }
663
664 static void surface_force_reload(struct wined3d_surface *surface)
665 {
666     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
667 }
668
669 static void surface_release_client_storage(struct wined3d_surface *surface)
670 {
671     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
672     const struct wined3d_gl_info *gl_info = context->gl_info;
673
674     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
675     if (surface->texture_name)
676     {
677         surface_bind_and_dirtify(surface, context, FALSE);
678         gl_info->gl_ops.gl.p_glTexImage2D(surface->texture_target, surface->texture_level,
679                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
680     }
681     if (surface->texture_name_srgb)
682     {
683         surface_bind_and_dirtify(surface, context, TRUE);
684         gl_info->gl_ops.gl.p_glTexImage2D(surface->texture_target, surface->texture_level,
685                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
686     }
687     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
688
689     context_release(context);
690
691     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
692     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
693     surface_force_reload(surface);
694 }
695
696 static HRESULT surface_private_setup(struct wined3d_surface *surface)
697 {
698     /* TODO: Check against the maximum texture sizes supported by the video card. */
699     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
700     unsigned int pow2Width, pow2Height;
701
702     TRACE("surface %p.\n", surface);
703
704     surface->texture_name = 0;
705     surface->texture_target = GL_TEXTURE_2D;
706
707     /* Non-power2 support */
708     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
709     {
710         pow2Width = surface->resource.width;
711         pow2Height = surface->resource.height;
712     }
713     else
714     {
715         /* Find the nearest pow2 match */
716         pow2Width = pow2Height = 1;
717         while (pow2Width < surface->resource.width)
718             pow2Width <<= 1;
719         while (pow2Height < surface->resource.height)
720             pow2Height <<= 1;
721     }
722     surface->pow2Width = pow2Width;
723     surface->pow2Height = pow2Height;
724
725     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
726     {
727         /* TODO: Add support for non power two compressed textures. */
728         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
729         {
730             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
731                   surface, surface->resource.width, surface->resource.height);
732             return WINED3DERR_NOTAVAILABLE;
733         }
734     }
735
736     if (pow2Width != surface->resource.width
737             || pow2Height != surface->resource.height)
738     {
739         surface->flags |= SFLAG_NONPOW2;
740     }
741
742     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
743             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
744     {
745         /* One of three options:
746          * 1: Do the same as we do with NPOT and scale the texture, (any
747          *    texture ops would require the texture to be scaled which is
748          *    potentially slow)
749          * 2: Set the texture to the maximum size (bad idea).
750          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
751          * 4: Create the surface, but allow it to be used only for DirectDraw
752          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
753          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
754          *    the render target. */
755         if (surface->resource.pool == WINED3D_POOL_DEFAULT || surface->resource.pool == WINED3D_POOL_MANAGED)
756         {
757             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
758             return WINED3DERR_NOTAVAILABLE;
759         }
760
761         /* We should never use this surface in combination with OpenGL! */
762         TRACE("Creating an oversized surface: %ux%u.\n",
763                 surface->pow2Width, surface->pow2Height);
764     }
765     else
766     {
767         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
768          * and EXT_PALETTED_TEXTURE is used in combination with texture
769          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
770          * EXT_PALETTED_TEXTURE doesn't work in combination with
771          * ARB_TEXTURE_RECTANGLE. */
772         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
773                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
774                 && gl_info->supported[EXT_PALETTED_TEXTURE]
775                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
776         {
777             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
778             surface->pow2Width = surface->resource.width;
779             surface->pow2Height = surface->resource.height;
780             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
781         }
782     }
783
784     switch (wined3d_settings.offscreen_rendering_mode)
785     {
786         case ORM_FBO:
787             surface->get_drawable_size = get_drawable_size_fbo;
788             break;
789
790         case ORM_BACKBUFFER:
791             surface->get_drawable_size = get_drawable_size_backbuffer;
792             break;
793
794         default:
795             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
796             return WINED3DERR_INVALIDCALL;
797     }
798
799     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
800         surface->flags |= SFLAG_DISCARDED;
801
802     return WINED3D_OK;
803 }
804
805 static void surface_realize_palette(struct wined3d_surface *surface)
806 {
807     struct wined3d_palette *palette = surface->palette;
808
809     TRACE("surface %p.\n", surface);
810
811     if (!palette) return;
812
813     if (surface->resource.format->id == WINED3DFMT_P8_UINT
814             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
815     {
816         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
817         {
818             /* Make sure the texture is up to date. This call doesn't do
819              * anything if the texture is already up to date. */
820             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
821
822             /* We want to force a palette refresh, so mark the drawable as not being up to date */
823             if (!surface_is_offscreen(surface))
824                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
825         }
826         else
827         {
828             if (!(surface->flags & SFLAG_INSYSMEM))
829             {
830                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
831                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
832             }
833             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
834         }
835     }
836
837     if (surface->flags & SFLAG_DIBSECTION)
838     {
839         RGBQUAD col[256];
840         unsigned int i;
841
842         TRACE("Updating the DC's palette.\n");
843
844         for (i = 0; i < 256; ++i)
845         {
846             col[i].rgbRed   = palette->palents[i].peRed;
847             col[i].rgbGreen = palette->palents[i].peGreen;
848             col[i].rgbBlue  = palette->palents[i].peBlue;
849             col[i].rgbReserved = 0;
850         }
851         SetDIBColorTable(surface->hDC, 0, 256, col);
852     }
853
854     /* Propagate the changes to the drawable when we have a palette. */
855     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
856         surface_load_location(surface, surface->draw_binding, NULL);
857 }
858
859 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
860 {
861     HRESULT hr;
862
863     /* If there's no destination surface there is nothing to do. */
864     if (!surface->overlay_dest)
865         return WINED3D_OK;
866
867     /* Blt calls ModifyLocation on the dest surface, which in turn calls
868      * DrawOverlay to update the overlay. Prevent an endless recursion. */
869     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
870         return WINED3D_OK;
871
872     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
873     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
874             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3D_TEXF_LINEAR);
875     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
876
877     return hr;
878 }
879
880 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
881 {
882     struct wined3d_device *device = surface->resource.device;
883     const RECT *pass_rect = rect;
884
885     TRACE("surface %p, rect %s, flags %#x.\n",
886             surface, wine_dbgstr_rect(rect), flags);
887
888     if (flags & WINED3D_MAP_DISCARD)
889     {
890         TRACE("WINED3D_MAP_DISCARD flag passed, marking SYSMEM as up to date.\n");
891         surface_prepare_system_memory(surface);
892         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
893     }
894     else
895     {
896         /* surface_load_location() does not check if the rectangle specifies
897          * the full surface. Most callers don't need that, so do it here. */
898         if (rect && !rect->top && !rect->left
899                 && rect->right == surface->resource.width
900                 && rect->bottom == surface->resource.height)
901             pass_rect = NULL;
902         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
903     }
904
905     if (surface->flags & SFLAG_PBO)
906     {
907         const struct wined3d_gl_info *gl_info;
908         struct wined3d_context *context;
909
910         context = context_acquire(device, NULL);
911         gl_info = context->gl_info;
912
913         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
914         checkGLcall("glBindBufferARB");
915
916         /* This shouldn't happen but could occur if some other function
917          * didn't handle the PBO properly. */
918         if (surface->resource.allocatedMemory)
919             ERR("The surface already has PBO memory allocated.\n");
920
921         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
922         checkGLcall("glMapBufferARB");
923
924         /* Make sure the PBO isn't set anymore in order not to break non-PBO
925          * calls. */
926         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
927         checkGLcall("glBindBufferARB");
928
929         context_release(context);
930     }
931
932     if (!(flags & (WINED3D_MAP_NO_DIRTY_UPDATE | WINED3D_MAP_READONLY)))
933     {
934         if (!rect)
935             surface_add_dirty_rect(surface, NULL);
936         else
937         {
938             struct wined3d_box b;
939
940             b.left = rect->left;
941             b.top = rect->top;
942             b.right = rect->right;
943             b.bottom = rect->bottom;
944             b.front = 0;
945             b.back = 1;
946             surface_add_dirty_rect(surface, &b);
947         }
948     }
949 }
950
951 static void surface_unmap(struct wined3d_surface *surface)
952 {
953     struct wined3d_device *device = surface->resource.device;
954     BOOL fullsurface;
955
956     TRACE("surface %p.\n", surface);
957
958     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
959
960     if (surface->flags & SFLAG_PBO)
961     {
962         const struct wined3d_gl_info *gl_info;
963         struct wined3d_context *context;
964
965         TRACE("Freeing PBO memory.\n");
966
967         context = context_acquire(device, NULL);
968         gl_info = context->gl_info;
969
970         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
971         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
972         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
973         checkGLcall("glUnmapBufferARB");
974         context_release(context);
975
976         surface->resource.allocatedMemory = NULL;
977     }
978
979     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
980
981     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
982     {
983         TRACE("Not dirtified, nothing to do.\n");
984         goto done;
985     }
986
987     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
988             && surface->container.u.swapchain->front_buffer == surface)
989     {
990         if (!surface->dirtyRect.left && !surface->dirtyRect.top
991                 && surface->dirtyRect.right == surface->resource.width
992                 && surface->dirtyRect.bottom == surface->resource.height)
993         {
994             fullsurface = TRUE;
995         }
996         else
997         {
998             /* TODO: Proper partial rectangle tracking. */
999             fullsurface = FALSE;
1000             surface->flags |= SFLAG_INSYSMEM;
1001         }
1002
1003         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
1004
1005         /* Partial rectangle tracking is not commonly implemented, it is only
1006          * done for render targets. INSYSMEM was set before to tell
1007          * surface_load_location() where to read the rectangle from.
1008          * Indrawable is set because all modifications from the partial
1009          * sysmem copy are written back to the drawable, thus the surface is
1010          * merged again in the drawable. The sysmem copy is not fully up to
1011          * date because only a subrectangle was read in Map(). */
1012         if (!fullsurface)
1013         {
1014             surface_modify_location(surface, surface->draw_binding, TRUE);
1015             surface_evict_sysmem(surface);
1016         }
1017
1018         surface->dirtyRect.left = surface->resource.width;
1019         surface->dirtyRect.top = surface->resource.height;
1020         surface->dirtyRect.right = 0;
1021         surface->dirtyRect.bottom = 0;
1022     }
1023     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1024     {
1025         FIXME("Depth / stencil buffer locking is not implemented.\n");
1026     }
1027
1028 done:
1029     /* Overlays have to be redrawn manually after changes with the GL implementation */
1030     if (surface->overlay_dest)
1031         surface_draw_overlay(surface);
1032 }
1033
1034 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1035 {
1036     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1037         return FALSE;
1038     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1039         return FALSE;
1040     return TRUE;
1041 }
1042
1043 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1044         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1045 {
1046     const struct wined3d_gl_info *gl_info;
1047     struct wined3d_context *context;
1048     DWORD src_mask, dst_mask;
1049     GLbitfield gl_mask;
1050
1051     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1052             device, src_surface, wine_dbgstr_rect(src_rect),
1053             dst_surface, wine_dbgstr_rect(dst_rect));
1054
1055     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1056     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1057
1058     if (src_mask != dst_mask)
1059     {
1060         ERR("Incompatible formats %s and %s.\n",
1061                 debug_d3dformat(src_surface->resource.format->id),
1062                 debug_d3dformat(dst_surface->resource.format->id));
1063         return;
1064     }
1065
1066     if (!src_mask)
1067     {
1068         ERR("Not a depth / stencil format: %s.\n",
1069                 debug_d3dformat(src_surface->resource.format->id));
1070         return;
1071     }
1072
1073     gl_mask = 0;
1074     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1075         gl_mask |= GL_DEPTH_BUFFER_BIT;
1076     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1077         gl_mask |= GL_STENCIL_BUFFER_BIT;
1078
1079     /* Make sure the locations are up-to-date. Loading the destination
1080      * surface isn't required if the entire surface is overwritten. */
1081     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1082     if (!surface_is_full_rect(dst_surface, dst_rect))
1083         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1084
1085     context = context_acquire(device, NULL);
1086     if (!context->valid)
1087     {
1088         context_release(context);
1089         WARN("Invalid context, skipping blit.\n");
1090         return;
1091     }
1092
1093     gl_info = context->gl_info;
1094
1095     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1096     gl_info->gl_ops.gl.p_glReadBuffer(GL_NONE);
1097     checkGLcall("glReadBuffer()");
1098     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1099
1100     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1101     context_set_draw_buffer(context, GL_NONE);
1102     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1103     context_invalidate_state(context, STATE_FRAMEBUFFER);
1104
1105     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1106     {
1107         gl_info->gl_ops.gl.p_glDepthMask(GL_TRUE);
1108         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1109     }
1110     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1111     {
1112         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1113         {
1114             gl_info->gl_ops.gl.p_glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1115             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1116         }
1117         gl_info->gl_ops.gl.p_glStencilMask(~0U);
1118         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1119     }
1120
1121     gl_info->gl_ops.gl.p_glDisable(GL_SCISSOR_TEST);
1122     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1123
1124     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1125             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1126     checkGLcall("glBlitFramebuffer()");
1127
1128     if (wined3d_settings.strict_draw_ordering)
1129         gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
1130
1131     context_release(context);
1132 }
1133
1134 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1135  * Depth / stencil is not supported. */
1136 static void surface_blt_fbo(const struct wined3d_device *device, enum wined3d_texture_filter_type filter,
1137         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1138         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1139 {
1140     const struct wined3d_gl_info *gl_info;
1141     struct wined3d_context *context;
1142     RECT src_rect, dst_rect;
1143     GLenum gl_filter;
1144     GLenum buffer;
1145
1146     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1147     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1148             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1149     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1150             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1151
1152     src_rect = *src_rect_in;
1153     dst_rect = *dst_rect_in;
1154
1155     switch (filter)
1156     {
1157         case WINED3D_TEXF_LINEAR:
1158             gl_filter = GL_LINEAR;
1159             break;
1160
1161         default:
1162             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1163         case WINED3D_TEXF_NONE:
1164         case WINED3D_TEXF_POINT:
1165             gl_filter = GL_NEAREST;
1166             break;
1167     }
1168
1169     /* Resolve the source surface first if needed. */
1170     if (src_location == SFLAG_INRB_MULTISAMPLE
1171             && (src_surface->resource.format->id != dst_surface->resource.format->id
1172                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1173                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1174         src_location = SFLAG_INRB_RESOLVED;
1175
1176     /* Make sure the locations are up-to-date. Loading the destination
1177      * surface isn't required if the entire surface is overwritten. (And is
1178      * in fact harmful if we're being called by surface_load_location() with
1179      * the purpose of loading the destination surface.) */
1180     surface_load_location(src_surface, src_location, NULL);
1181     if (!surface_is_full_rect(dst_surface, &dst_rect))
1182         surface_load_location(dst_surface, dst_location, NULL);
1183
1184     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1185     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1186     else context = context_acquire(device, NULL);
1187
1188     if (!context->valid)
1189     {
1190         context_release(context);
1191         WARN("Invalid context, skipping blit.\n");
1192         return;
1193     }
1194
1195     gl_info = context->gl_info;
1196
1197     if (src_location == SFLAG_INDRAWABLE)
1198     {
1199         TRACE("Source surface %p is onscreen.\n", src_surface);
1200         buffer = surface_get_gl_buffer(src_surface);
1201         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1202     }
1203     else
1204     {
1205         TRACE("Source surface %p is offscreen.\n", src_surface);
1206         buffer = GL_COLOR_ATTACHMENT0;
1207     }
1208
1209     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1210     gl_info->gl_ops.gl.p_glReadBuffer(buffer);
1211     checkGLcall("glReadBuffer()");
1212     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1213
1214     if (dst_location == SFLAG_INDRAWABLE)
1215     {
1216         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1217         buffer = surface_get_gl_buffer(dst_surface);
1218         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1219     }
1220     else
1221     {
1222         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1223         buffer = GL_COLOR_ATTACHMENT0;
1224     }
1225
1226     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1227     context_set_draw_buffer(context, buffer);
1228     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1229     context_invalidate_state(context, STATE_FRAMEBUFFER);
1230
1231     gl_info->gl_ops.gl.p_glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1232     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1233     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1234     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1235     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1236
1237     gl_info->gl_ops.gl.p_glDisable(GL_SCISSOR_TEST);
1238     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1239
1240     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1241             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1242     checkGLcall("glBlitFramebuffer()");
1243
1244     if (wined3d_settings.strict_draw_ordering
1245             || (dst_location == SFLAG_INDRAWABLE
1246             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1247         gl_info->gl_ops.gl.p_glFlush();
1248
1249     context_release(context);
1250 }
1251
1252 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1253         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
1254         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
1255 {
1256     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1257         return FALSE;
1258
1259     /* Source and/or destination need to be on the GL side */
1260     if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
1261         return FALSE;
1262
1263     switch (blit_op)
1264     {
1265         case WINED3D_BLIT_OP_COLOR_BLIT:
1266             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1267                 return FALSE;
1268             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1269                 return FALSE;
1270             break;
1271
1272         case WINED3D_BLIT_OP_DEPTH_BLIT:
1273             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1274                 return FALSE;
1275             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1276                 return FALSE;
1277             break;
1278
1279         default:
1280             return FALSE;
1281     }
1282
1283     if (!(src_format->id == dst_format->id
1284             || (is_identity_fixup(src_format->color_fixup)
1285             && is_identity_fixup(dst_format->color_fixup))))
1286         return FALSE;
1287
1288     return TRUE;
1289 }
1290
1291 /* This function checks if the primary render target uses the 8bit paletted format. */
1292 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1293 {
1294     if (device->fb.render_targets && device->fb.render_targets[0])
1295     {
1296         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1297         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1298                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1299             return TRUE;
1300     }
1301     return FALSE;
1302 }
1303
1304 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1305         DWORD color, struct wined3d_color *float_color)
1306 {
1307     const struct wined3d_format *format = surface->resource.format;
1308     const struct wined3d_device *device = surface->resource.device;
1309
1310     switch (format->id)
1311     {
1312         case WINED3DFMT_P8_UINT:
1313             if (surface->palette)
1314             {
1315                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1316                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1317                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1318             }
1319             else
1320             {
1321                 float_color->r = 0.0f;
1322                 float_color->g = 0.0f;
1323                 float_color->b = 0.0f;
1324             }
1325             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1326             break;
1327
1328         case WINED3DFMT_B5G6R5_UNORM:
1329             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1330             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1331             float_color->b = (color & 0x1f) / 31.0f;
1332             float_color->a = 1.0f;
1333             break;
1334
1335         case WINED3DFMT_B8G8R8_UNORM:
1336         case WINED3DFMT_B8G8R8X8_UNORM:
1337             float_color->r = D3DCOLOR_R(color);
1338             float_color->g = D3DCOLOR_G(color);
1339             float_color->b = D3DCOLOR_B(color);
1340             float_color->a = 1.0f;
1341             break;
1342
1343         case WINED3DFMT_B8G8R8A8_UNORM:
1344             float_color->r = D3DCOLOR_R(color);
1345             float_color->g = D3DCOLOR_G(color);
1346             float_color->b = D3DCOLOR_B(color);
1347             float_color->a = D3DCOLOR_A(color);
1348             break;
1349
1350         default:
1351             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1352             return FALSE;
1353     }
1354
1355     return TRUE;
1356 }
1357
1358 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1359 {
1360     const struct wined3d_format *format = surface->resource.format;
1361
1362     switch (format->id)
1363     {
1364         case WINED3DFMT_S1_UINT_D15_UNORM:
1365             *float_depth = depth / (float)0x00007fff;
1366             break;
1367
1368         case WINED3DFMT_D16_UNORM:
1369             *float_depth = depth / (float)0x0000ffff;
1370             break;
1371
1372         case WINED3DFMT_D24_UNORM_S8_UINT:
1373         case WINED3DFMT_X8D24_UNORM:
1374             *float_depth = depth / (float)0x00ffffff;
1375             break;
1376
1377         case WINED3DFMT_D32_UNORM:
1378             *float_depth = depth / (float)0xffffffff;
1379             break;
1380
1381         default:
1382             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1383             return FALSE;
1384     }
1385
1386     return TRUE;
1387 }
1388
1389 /* Do not call while under the GL lock. */
1390 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1391 {
1392     const struct wined3d_resource *resource = &surface->resource;
1393     struct wined3d_device *device = resource->device;
1394     const struct blit_shader *blitter;
1395
1396     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1397             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1398     if (!blitter)
1399     {
1400         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1401         return WINED3DERR_INVALIDCALL;
1402     }
1403
1404     return blitter->depth_fill(device, surface, rect, depth);
1405 }
1406
1407 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1408         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1409 {
1410     struct wined3d_device *device = src_surface->resource.device;
1411
1412     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1413             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1414             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1415         return WINED3DERR_INVALIDCALL;
1416
1417     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1418
1419     surface_modify_ds_location(dst_surface, SFLAG_INTEXTURE,
1420             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1421
1422     return WINED3D_OK;
1423 }
1424
1425 /* Do not call while under the GL lock. */
1426 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1427         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1428         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
1429 {
1430     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1431     struct wined3d_device *device = dst_surface->resource.device;
1432     DWORD src_ds_flags, dst_ds_flags;
1433     RECT src_rect, dst_rect;
1434     BOOL scale, convert;
1435
1436     static const DWORD simple_blit = WINEDDBLT_ASYNC
1437             | WINEDDBLT_COLORFILL
1438             | WINEDDBLT_WAIT
1439             | WINEDDBLT_DEPTHFILL
1440             | WINEDDBLT_DONOTWAIT;
1441
1442     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1443             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1444             flags, fx, debug_d3dtexturefiltertype(filter));
1445     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1446
1447     if (fx)
1448     {
1449         TRACE("dwSize %#x.\n", fx->dwSize);
1450         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1451         TRACE("dwROP %#x.\n", fx->dwROP);
1452         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1453         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1454         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1455         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1456         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1457         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1458         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1459         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1460         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1461         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1462         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1463         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1464         TRACE("dwReserved %#x.\n", fx->dwReserved);
1465         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1466         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1467         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1468         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1469         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1470         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1471                 fx->ddckDestColorkey.color_space_low_value,
1472                 fx->ddckDestColorkey.color_space_high_value);
1473         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1474                 fx->ddckSrcColorkey.color_space_low_value,
1475                 fx->ddckSrcColorkey.color_space_high_value);
1476     }
1477
1478     if (dst_surface->resource.map_count || (src_surface && src_surface->resource.map_count))
1479     {
1480         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1481         return WINEDDERR_SURFACEBUSY;
1482     }
1483
1484     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1485
1486     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1487             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1488             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1489             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1490             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1491     {
1492         WARN("The application gave us a bad destination rectangle.\n");
1493         return WINEDDERR_INVALIDRECT;
1494     }
1495
1496     if (src_surface)
1497     {
1498         surface_get_rect(src_surface, src_rect_in, &src_rect);
1499
1500         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1501                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1502                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1503                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1504                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1505         {
1506             WARN("Application gave us bad source rectangle for Blt.\n");
1507             return WINEDDERR_INVALIDRECT;
1508         }
1509     }
1510     else
1511     {
1512         memset(&src_rect, 0, sizeof(src_rect));
1513     }
1514
1515     if (!fx || !(fx->dwDDFX))
1516         flags &= ~WINEDDBLT_DDFX;
1517
1518     if (flags & WINEDDBLT_WAIT)
1519         flags &= ~WINEDDBLT_WAIT;
1520
1521     if (flags & WINEDDBLT_ASYNC)
1522     {
1523         static unsigned int once;
1524
1525         if (!once++)
1526             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1527         flags &= ~WINEDDBLT_ASYNC;
1528     }
1529
1530     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1531     if (flags & WINEDDBLT_DONOTWAIT)
1532     {
1533         static unsigned int once;
1534
1535         if (!once++)
1536             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1537         flags &= ~WINEDDBLT_DONOTWAIT;
1538     }
1539
1540     if (!device->d3d_initialized)
1541     {
1542         WARN("D3D not initialized, using fallback.\n");
1543         goto cpu;
1544     }
1545
1546     /* We want to avoid invalidating the sysmem location for converted
1547      * surfaces, since otherwise we'd have to convert the data back when
1548      * locking them. */
1549     if (dst_surface->flags & SFLAG_CONVERTED)
1550     {
1551         WARN("Converted surface, using CPU blit.\n");
1552         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1553     }
1554
1555     if (flags & ~simple_blit)
1556     {
1557         WARN("Using fallback for complex blit (%#x).\n", flags);
1558         goto fallback;
1559     }
1560
1561     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1562         src_swapchain = src_surface->container.u.swapchain;
1563     else
1564         src_swapchain = NULL;
1565
1566     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1567         dst_swapchain = dst_surface->container.u.swapchain;
1568     else
1569         dst_swapchain = NULL;
1570
1571     /* This isn't strictly needed. FBO blits for example could deal with
1572      * cross-swapchain blits by first downloading the source to a texture
1573      * before switching to the destination context. We just have this here to
1574      * not have to deal with the issue, since cross-swapchain blits should be
1575      * rare. */
1576     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1577     {
1578         FIXME("Using fallback for cross-swapchain blit.\n");
1579         goto fallback;
1580     }
1581
1582     scale = src_surface
1583             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1584             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1585     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1586
1587     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1588     if (src_surface)
1589         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1590     else
1591         src_ds_flags = 0;
1592
1593     if (src_ds_flags || dst_ds_flags)
1594     {
1595         if (flags & WINEDDBLT_DEPTHFILL)
1596         {
1597             float depth;
1598
1599             TRACE("Depth fill.\n");
1600
1601             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1602                 return WINED3DERR_INVALIDCALL;
1603
1604             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1605                 return WINED3D_OK;
1606         }
1607         else
1608         {
1609             if (src_ds_flags != dst_ds_flags)
1610             {
1611                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1612                 return WINED3DERR_INVALIDCALL;
1613             }
1614
1615             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1616                 return WINED3D_OK;
1617         }
1618     }
1619     else
1620     {
1621         /* In principle this would apply to depth blits as well, but we don't
1622          * implement those in the CPU blitter at the moment. */
1623         if ((dst_surface->flags & SFLAG_INSYSMEM)
1624                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1625         {
1626             if (scale)
1627                 TRACE("Not doing sysmem blit because of scaling.\n");
1628             else if (convert)
1629                 TRACE("Not doing sysmem blit because of format conversion.\n");
1630             else
1631                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1632         }
1633
1634         if (flags & WINEDDBLT_COLORFILL)
1635         {
1636             struct wined3d_color color;
1637
1638             TRACE("Color fill.\n");
1639
1640             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1641                 goto fallback;
1642
1643             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1644                 return WINED3D_OK;
1645         }
1646         else
1647         {
1648             TRACE("Color blit.\n");
1649
1650             /* Upload */
1651             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1652             {
1653                 if (scale)
1654                     TRACE("Not doing upload because of scaling.\n");
1655                 else if (convert)
1656                     TRACE("Not doing upload because of format conversion.\n");
1657                 else
1658                 {
1659                     POINT dst_point = {dst_rect.left, dst_rect.top};
1660
1661                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1662                     {
1663                         if (!surface_is_offscreen(dst_surface))
1664                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1665                         return WINED3D_OK;
1666                     }
1667                 }
1668             }
1669
1670             /* Use present for back -> front blits. The idea behind this is
1671              * that present is potentially faster than a blit, in particular
1672              * when FBO blits aren't available. Some ddraw applications like
1673              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1674              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1675              * applications can't blit directly to the frontbuffer. */
1676             if (dst_swapchain && dst_swapchain->back_buffers
1677                     && dst_surface == dst_swapchain->front_buffer
1678                     && src_surface == dst_swapchain->back_buffers[0])
1679             {
1680                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1681
1682                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1683
1684                 /* Set the swap effect to COPY, we don't want the backbuffer
1685                  * to become undefined. */
1686                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1687                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1688                 dst_swapchain->desc.swap_effect = swap_effect;
1689
1690                 return WINED3D_OK;
1691             }
1692
1693             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1694                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1695                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1696             {
1697                 TRACE("Using FBO blit.\n");
1698
1699                 surface_blt_fbo(device, filter,
1700                         src_surface, src_surface->draw_binding, &src_rect,
1701                         dst_surface, dst_surface->draw_binding, &dst_rect);
1702                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1703                 return WINED3D_OK;
1704             }
1705
1706             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1707                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1708                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1709             {
1710                 TRACE("Using arbfp blit.\n");
1711
1712                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1713                     return WINED3D_OK;
1714             }
1715         }
1716     }
1717
1718 fallback:
1719
1720     /* Special cases for render targets. */
1721     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1722             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1723     {
1724         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1725                 src_surface, &src_rect, flags, fx, filter)))
1726             return WINED3D_OK;
1727     }
1728
1729 cpu:
1730
1731     /* For the rest call the X11 surface implementation. For render targets
1732      * this should be implemented OpenGL accelerated in BltOverride, other
1733      * blits are rather rare. */
1734     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1735 }
1736
1737 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1738         struct wined3d_surface *render_target)
1739 {
1740     TRACE("surface %p, render_target %p.\n", surface, render_target);
1741
1742     /* TODO: Check surface sizes, pools, etc. */
1743
1744     if (render_target->resource.multisample_type)
1745         return WINED3DERR_INVALIDCALL;
1746
1747     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3D_TEXF_POINT);
1748 }
1749
1750 /* Context activation is done by the caller. */
1751 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1752 {
1753     if (surface->flags & SFLAG_DIBSECTION)
1754     {
1755         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1756     }
1757     else
1758     {
1759         if (!surface->resource.heapMemory)
1760             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1761         else if (!(surface->flags & SFLAG_CLIENT))
1762             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1763                     surface, surface->resource.heapMemory, surface->flags);
1764
1765         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1766                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1767     }
1768
1769     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1770     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1771     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1772             surface->resource.size, surface->resource.allocatedMemory));
1773     checkGLcall("glGetBufferSubDataARB");
1774     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1775     checkGLcall("glDeleteBuffersARB");
1776
1777     surface->pbo = 0;
1778     surface->flags &= ~SFLAG_PBO;
1779 }
1780
1781 static BOOL surface_init_sysmem(struct wined3d_surface *surface)
1782 {
1783     if (!surface->resource.allocatedMemory)
1784     {
1785         if (!surface->resource.heapMemory)
1786         {
1787             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
1788                     surface->resource.size + RESOURCE_ALIGNMENT)))
1789             {
1790                 ERR("Failed to allocate memory.\n");
1791                 return FALSE;
1792             }
1793         }
1794         else if (!(surface->flags & SFLAG_CLIENT))
1795         {
1796             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1797                     surface, surface->resource.heapMemory, surface->flags);
1798         }
1799
1800         surface->resource.allocatedMemory =
1801             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1802     }
1803     else
1804     {
1805         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
1806     }
1807
1808     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1809
1810     return TRUE;
1811 }
1812
1813 /* Do not call while under the GL lock. */
1814 static void surface_unload(struct wined3d_resource *resource)
1815 {
1816     struct wined3d_surface *surface = surface_from_resource(resource);
1817     struct wined3d_renderbuffer_entry *entry, *entry2;
1818     struct wined3d_device *device = resource->device;
1819     const struct wined3d_gl_info *gl_info;
1820     struct wined3d_context *context;
1821
1822     TRACE("surface %p.\n", surface);
1823
1824     if (resource->pool == WINED3D_POOL_DEFAULT)
1825     {
1826         /* Default pool resources are supposed to be destroyed before Reset is called.
1827          * Implicit resources stay however. So this means we have an implicit render target
1828          * or depth stencil. The content may be destroyed, but we still have to tear down
1829          * opengl resources, so we cannot leave early.
1830          *
1831          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1832          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1833          * or the depth stencil into an FBO the texture or render buffer will be removed
1834          * and all flags get lost
1835          */
1836         if (!(surface->flags & SFLAG_PBO))
1837             surface_init_sysmem(surface);
1838         /* We also get here when the ddraw swapchain is destroyed, for example
1839          * for a mode switch. In this case this surface won't necessarily be
1840          * an implicit surface. We have to mark it lost so that the
1841          * application can restore it after the mode switch. */
1842         surface->flags |= SFLAG_LOST;
1843     }
1844     else
1845     {
1846         /* Load the surface into system memory */
1847         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1848         surface_modify_location(surface, surface->draw_binding, FALSE);
1849     }
1850     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1851     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1852     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1853
1854     context = context_acquire(device, NULL);
1855     gl_info = context->gl_info;
1856
1857     /* Destroy PBOs, but load them into real sysmem before */
1858     if (surface->flags & SFLAG_PBO)
1859         surface_remove_pbo(surface, gl_info);
1860
1861     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1862      * all application-created targets the application has to release the surface
1863      * before calling _Reset
1864      */
1865     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1866     {
1867         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1868         list_remove(&entry->entry);
1869         HeapFree(GetProcessHeap(), 0, entry);
1870     }
1871     list_init(&surface->renderbuffers);
1872     surface->current_renderbuffer = NULL;
1873
1874     /* If we're in a texture, the texture name belongs to the texture.
1875      * Otherwise, destroy it. */
1876     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1877     {
1878         gl_info->gl_ops.gl.p_glDeleteTextures(1, &surface->texture_name);
1879         surface->texture_name = 0;
1880         gl_info->gl_ops.gl.p_glDeleteTextures(1, &surface->texture_name_srgb);
1881         surface->texture_name_srgb = 0;
1882     }
1883     if (surface->rb_multisample)
1884     {
1885         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1886         surface->rb_multisample = 0;
1887     }
1888     if (surface->rb_resolved)
1889     {
1890         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1891         surface->rb_resolved = 0;
1892     }
1893
1894     context_release(context);
1895
1896     resource_unload(resource);
1897 }
1898
1899 static const struct wined3d_resource_ops surface_resource_ops =
1900 {
1901     surface_unload,
1902 };
1903
1904 static const struct wined3d_surface_ops surface_ops =
1905 {
1906     surface_private_setup,
1907     surface_realize_palette,
1908     surface_map,
1909     surface_unmap,
1910 };
1911
1912 /*****************************************************************************
1913  * Initializes the GDI surface, aka creates the DIB section we render to
1914  * The DIB section creation is done by calling GetDC, which will create the
1915  * section and releasing the dc to allow the app to use it. The dib section
1916  * will stay until the surface is released
1917  *
1918  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1919  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1920  * avoid confusion in the shared surface code.
1921  *
1922  * Returns:
1923  *  WINED3D_OK on success
1924  *  The return values of called methods on failure
1925  *
1926  *****************************************************************************/
1927 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1928 {
1929     HRESULT hr;
1930
1931     TRACE("surface %p.\n", surface);
1932
1933     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1934     {
1935         ERR("Overlays not yet supported by GDI surfaces.\n");
1936         return WINED3DERR_INVALIDCALL;
1937     }
1938
1939     /* Sysmem textures have memory already allocated - release it,
1940      * this avoids an unnecessary memcpy. */
1941     hr = surface_create_dib_section(surface);
1942     if (SUCCEEDED(hr))
1943     {
1944         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1945         surface->resource.heapMemory = NULL;
1946         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1947     }
1948
1949     /* We don't mind the nonpow2 stuff in GDI. */
1950     surface->pow2Width = surface->resource.width;
1951     surface->pow2Height = surface->resource.height;
1952
1953     return WINED3D_OK;
1954 }
1955
1956 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1957 {
1958     struct wined3d_palette *palette = surface->palette;
1959
1960     TRACE("surface %p.\n", surface);
1961
1962     if (!palette) return;
1963
1964     if (surface->flags & SFLAG_DIBSECTION)
1965     {
1966         RGBQUAD col[256];
1967         unsigned int i;
1968
1969         TRACE("Updating the DC's palette.\n");
1970
1971         for (i = 0; i < 256; ++i)
1972         {
1973             col[i].rgbRed = palette->palents[i].peRed;
1974             col[i].rgbGreen = palette->palents[i].peGreen;
1975             col[i].rgbBlue = palette->palents[i].peBlue;
1976             col[i].rgbReserved = 0;
1977         }
1978         SetDIBColorTable(surface->hDC, 0, 256, col);
1979     }
1980
1981     /* Update the image because of the palette change. Some games like e.g.
1982      * Red Alert call SetEntries a lot to implement fading. */
1983     /* Tell the swapchain to update the screen. */
1984     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1985     {
1986         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1987         if (surface == swapchain->front_buffer)
1988         {
1989             x11_copy_to_screen(swapchain, NULL);
1990         }
1991     }
1992 }
1993
1994 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1995 {
1996     TRACE("surface %p, rect %s, flags %#x.\n",
1997             surface, wine_dbgstr_rect(rect), flags);
1998
1999     if (!(surface->flags & SFLAG_DIBSECTION))
2000     {
2001         HRESULT hr;
2002
2003         /* This happens on gdi surfaces if the application set a user pointer
2004          * and resets it. Recreate the DIB section. */
2005         if (FAILED(hr = surface_create_dib_section(surface)))
2006         {
2007             ERR("Failed to create dib section, hr %#x.\n", hr);
2008             return;
2009         }
2010         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
2011         surface->resource.heapMemory = NULL;
2012         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2013     }
2014 }
2015
2016 static void gdi_surface_unmap(struct wined3d_surface *surface)
2017 {
2018     TRACE("surface %p.\n", surface);
2019
2020     /* Tell the swapchain to update the screen. */
2021     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2022     {
2023         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2024         if (surface == swapchain->front_buffer)
2025         {
2026             x11_copy_to_screen(swapchain, &surface->lockedRect);
2027         }
2028     }
2029
2030     memset(&surface->lockedRect, 0, sizeof(RECT));
2031 }
2032
2033 static const struct wined3d_surface_ops gdi_surface_ops =
2034 {
2035     gdi_surface_private_setup,
2036     gdi_surface_realize_palette,
2037     gdi_surface_map,
2038     gdi_surface_unmap,
2039 };
2040
2041 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2042 {
2043     GLuint *name;
2044     DWORD flag;
2045
2046     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2047
2048     if(srgb)
2049     {
2050         name = &surface->texture_name_srgb;
2051         flag = SFLAG_INSRGBTEX;
2052     }
2053     else
2054     {
2055         name = &surface->texture_name;
2056         flag = SFLAG_INTEXTURE;
2057     }
2058
2059     if (!*name && new_name)
2060     {
2061         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2062          * surface has no texture name yet. See if we can get rid of this. */
2063         if (surface->flags & flag)
2064         {
2065             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2066             surface_modify_location(surface, flag, FALSE);
2067         }
2068     }
2069
2070     *name = new_name;
2071     surface_force_reload(surface);
2072 }
2073
2074 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target, GLint level)
2075 {
2076     TRACE("surface %p, target %#x.\n", surface, target);
2077
2078     if (surface->texture_target != target)
2079     {
2080         if (target == GL_TEXTURE_RECTANGLE_ARB)
2081         {
2082             surface->flags &= ~SFLAG_NORMCOORD;
2083         }
2084         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2085         {
2086             surface->flags |= SFLAG_NORMCOORD;
2087         }
2088     }
2089     surface->texture_target = target;
2090     surface->texture_level = level;
2091     surface_force_reload(surface);
2092 }
2093
2094 /* This call just downloads data, the caller is responsible for binding the
2095  * correct texture. */
2096 /* Context activation is done by the caller. */
2097 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2098 {
2099     const struct wined3d_format *format = surface->resource.format;
2100
2101     /* Only support read back of converted P8 surfaces. */
2102     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2103     {
2104         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2105         return;
2106     }
2107
2108     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2109     {
2110         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2111                 surface, surface->texture_level, format->glFormat, format->glType,
2112                 surface->resource.allocatedMemory);
2113
2114         if (surface->flags & SFLAG_PBO)
2115         {
2116             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2117             checkGLcall("glBindBufferARB");
2118             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2119             checkGLcall("glGetCompressedTexImageARB");
2120             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2121             checkGLcall("glBindBufferARB");
2122         }
2123         else
2124         {
2125             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2126                     surface->texture_level, surface->resource.allocatedMemory));
2127             checkGLcall("glGetCompressedTexImageARB");
2128         }
2129     }
2130     else
2131     {
2132         void *mem;
2133         GLenum gl_format = format->glFormat;
2134         GLenum gl_type = format->glType;
2135         int src_pitch = 0;
2136         int dst_pitch = 0;
2137
2138         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2139         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2140         {
2141             gl_format = GL_ALPHA;
2142             gl_type = GL_UNSIGNED_BYTE;
2143         }
2144
2145         if (surface->flags & SFLAG_NONPOW2)
2146         {
2147             unsigned char alignment = surface->resource.device->surface_alignment;
2148             src_pitch = format->byte_count * surface->pow2Width;
2149             dst_pitch = wined3d_surface_get_pitch(surface);
2150             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2151             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2152         }
2153         else
2154         {
2155             mem = surface->resource.allocatedMemory;
2156         }
2157
2158         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2159                 surface, surface->texture_level, gl_format, gl_type, mem);
2160
2161         if (surface->flags & SFLAG_PBO)
2162         {
2163             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2164             checkGLcall("glBindBufferARB");
2165
2166             gl_info->gl_ops.gl.p_glGetTexImage(surface->texture_target, surface->texture_level,
2167                     gl_format, gl_type, NULL);
2168             checkGLcall("glGetTexImage");
2169
2170             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2171             checkGLcall("glBindBufferARB");
2172         }
2173         else
2174         {
2175             gl_info->gl_ops.gl.p_glGetTexImage(surface->texture_target, surface->texture_level,
2176                     gl_format, gl_type, mem);
2177             checkGLcall("glGetTexImage");
2178         }
2179
2180         if (surface->flags & SFLAG_NONPOW2)
2181         {
2182             const BYTE *src_data;
2183             BYTE *dst_data;
2184             UINT y;
2185             /*
2186              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2187              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2188              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2189              *
2190              * We're doing this...
2191              *
2192              * instead of boxing the texture :
2193              * |<-texture width ->|  -->pow2width|   /\
2194              * |111111111111111111|              |   |
2195              * |222 Texture 222222| boxed empty  | texture height
2196              * |3333 Data 33333333|              |   |
2197              * |444444444444444444|              |   \/
2198              * -----------------------------------   |
2199              * |     boxed  empty | boxed empty  | pow2height
2200              * |                  |              |   \/
2201              * -----------------------------------
2202              *
2203              *
2204              * we're repacking the data to the expected texture width
2205              *
2206              * |<-texture width ->|  -->pow2width|   /\
2207              * |111111111111111111222222222222222|   |
2208              * |222333333333333333333444444444444| texture height
2209              * |444444                           |   |
2210              * |                                 |   \/
2211              * |                                 |   |
2212              * |            empty                | pow2height
2213              * |                                 |   \/
2214              * -----------------------------------
2215              *
2216              * == is the same as
2217              *
2218              * |<-texture width ->|    /\
2219              * |111111111111111111|
2220              * |222222222222222222|texture height
2221              * |333333333333333333|
2222              * |444444444444444444|    \/
2223              * --------------------
2224              *
2225              * this also means that any references to allocatedMemory should work with the data as if were a
2226              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2227              *
2228              * internally the texture is still stored in a boxed format so any references to textureName will
2229              * get a boxed texture with width pow2width and not a texture of width resource.width.
2230              *
2231              * Performance should not be an issue, because applications normally do not lock the surfaces when
2232              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2233              * and doesn't have to be re-read. */
2234             src_data = mem;
2235             dst_data = surface->resource.allocatedMemory;
2236             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2237             for (y = 1; y < surface->resource.height; ++y)
2238             {
2239                 /* skip the first row */
2240                 src_data += src_pitch;
2241                 dst_data += dst_pitch;
2242                 memcpy(dst_data, src_data, dst_pitch);
2243             }
2244
2245             HeapFree(GetProcessHeap(), 0, mem);
2246         }
2247     }
2248
2249     /* Surface has now been downloaded */
2250     surface->flags |= SFLAG_INSYSMEM;
2251 }
2252
2253 /* This call just uploads data, the caller is responsible for binding the
2254  * correct texture. */
2255 /* Context activation is done by the caller. */
2256 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2257         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2258         BOOL srgb, const struct wined3d_bo_address *data)
2259 {
2260     UINT update_w = src_rect->right - src_rect->left;
2261     UINT update_h = src_rect->bottom - src_rect->top;
2262
2263     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2264             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2265             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2266
2267     if (surface->resource.map_count)
2268     {
2269         WARN("Uploading a surface that is currently mapped, setting SFLAG_PIN_SYSMEM.\n");
2270         surface->flags |= SFLAG_PIN_SYSMEM;
2271     }
2272
2273     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2274     {
2275         update_h *= format->height_scale.numerator;
2276         update_h /= format->height_scale.denominator;
2277     }
2278
2279     if (data->buffer_object)
2280     {
2281         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2282         checkGLcall("glBindBufferARB");
2283     }
2284
2285     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2286     {
2287         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2288         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2289         const BYTE *addr = data->addr;
2290         GLenum internal;
2291
2292         addr += (src_rect->top / format->block_height) * src_pitch;
2293         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2294
2295         if (srgb)
2296             internal = format->glGammaInternal;
2297         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2298             internal = format->rtInternal;
2299         else
2300             internal = format->glInternal;
2301
2302         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2303                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2304                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2305
2306         if (row_length == src_pitch)
2307         {
2308             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2309                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2310         }
2311         else
2312         {
2313             UINT row, y;
2314
2315             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2316              * can't use the unpack row length like below. */
2317             for (row = 0, y = dst_point->y; row < row_count; ++row)
2318             {
2319                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2320                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2321                 y += format->block_height;
2322                 addr += src_pitch;
2323             }
2324         }
2325         checkGLcall("glCompressedTexSubImage2DARB");
2326     }
2327     else
2328     {
2329         const BYTE *addr = data->addr;
2330
2331         addr += src_rect->top * src_pitch;
2332         addr += src_rect->left * format->byte_count;
2333
2334         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2335                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2336                 update_w, update_h, format->glFormat, format->glType, addr);
2337
2338         gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2339         gl_info->gl_ops.gl.p_glTexSubImage2D(surface->texture_target, surface->texture_level,
2340                 dst_point->x, dst_point->y, update_w, update_h, format->glFormat, format->glType, addr);
2341         gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2342         checkGLcall("glTexSubImage2D");
2343     }
2344
2345     if (data->buffer_object)
2346     {
2347         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2348         checkGLcall("glBindBufferARB");
2349     }
2350
2351     if (wined3d_settings.strict_draw_ordering)
2352         gl_info->gl_ops.gl.p_glFlush();
2353
2354     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2355     {
2356         struct wined3d_device *device = surface->resource.device;
2357         unsigned int i;
2358
2359         for (i = 0; i < device->context_count; ++i)
2360         {
2361             context_surface_update(device->contexts[i], surface);
2362         }
2363     }
2364 }
2365
2366 static HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck, BOOL use_texturing,
2367         struct wined3d_format *format, enum wined3d_conversion_type *conversion_type)
2368 {
2369     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2370     const struct wined3d_device *device = surface->resource.device;
2371     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
2372     BOOL blit_supported = FALSE;
2373
2374     /* Copy the default values from the surface. Below we might perform fixups */
2375     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
2376     *format = *surface->resource.format;
2377     *conversion_type = WINED3D_CT_NONE;
2378
2379     /* Ok, now look if we have to do any conversion */
2380     switch (surface->resource.format->id)
2381     {
2382         case WINED3DFMT_P8_UINT:
2383             /* Below the call to blit_supported is disabled for Wine 1.2
2384              * because the function isn't operating correctly yet. At the
2385              * moment 8-bit blits are handled in software and if certain GL
2386              * extensions are around, surface conversion is performed at
2387              * upload time. The blit_supported call recognizes it as a
2388              * destination fixup. This type of upload 'fixup' and 8-bit to
2389              * 8-bit blits need to be handled by the blit_shader.
2390              * TODO: get rid of this #if 0. */
2391 #if 0
2392             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
2393                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
2394                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
2395 #endif
2396             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
2397
2398             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
2399              * texturing. Further also use conversion in case of color keying.
2400              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
2401              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
2402              * conflicts with this.
2403              */
2404             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
2405                     || colorkey_active || !use_texturing)
2406             {
2407                 format->glFormat = GL_RGBA;
2408                 format->glInternal = GL_RGBA;
2409                 format->glType = GL_UNSIGNED_BYTE;
2410                 format->conv_byte_count = 4;
2411                 if (colorkey_active)
2412                     *conversion_type = WINED3D_CT_PALETTED_CK;
2413                 else
2414                     *conversion_type = WINED3D_CT_PALETTED;
2415             }
2416             break;
2417
2418         case WINED3DFMT_B2G3R3_UNORM:
2419             /* **********************
2420                 GL_UNSIGNED_BYTE_3_3_2
2421                 ********************** */
2422             if (colorkey_active) {
2423                 /* This texture format will never be used.. So do not care about color keying
2424                     up until the point in time it will be needed :-) */
2425                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
2426             }
2427             break;
2428
2429         case WINED3DFMT_B5G6R5_UNORM:
2430             if (colorkey_active)
2431             {
2432                 *conversion_type = WINED3D_CT_CK_565;
2433                 format->glFormat = GL_RGBA;
2434                 format->glInternal = GL_RGB5_A1;
2435                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
2436                 format->conv_byte_count = 2;
2437             }
2438             break;
2439
2440         case WINED3DFMT_B5G5R5X1_UNORM:
2441             if (colorkey_active)
2442             {
2443                 *conversion_type = WINED3D_CT_CK_5551;
2444                 format->glFormat = GL_BGRA;
2445                 format->glInternal = GL_RGB5_A1;
2446                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
2447                 format->conv_byte_count = 2;
2448             }
2449             break;
2450
2451         case WINED3DFMT_B8G8R8_UNORM:
2452             if (colorkey_active)
2453             {
2454                 *conversion_type = WINED3D_CT_CK_RGB24;
2455                 format->glFormat = GL_RGBA;
2456                 format->glInternal = GL_RGBA8;
2457                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
2458                 format->conv_byte_count = 4;
2459             }
2460             break;
2461
2462         case WINED3DFMT_B8G8R8X8_UNORM:
2463             if (colorkey_active)
2464             {
2465                 *conversion_type = WINED3D_CT_RGB32_888;
2466                 format->glFormat = GL_RGBA;
2467                 format->glInternal = GL_RGBA8;
2468                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
2469                 format->conv_byte_count = 4;
2470             }
2471             break;
2472
2473         case WINED3DFMT_B8G8R8A8_UNORM:
2474             if (colorkey_active)
2475             {
2476                 *conversion_type = WINED3D_CT_CK_ARGB32;
2477                 format->conv_byte_count = 4;
2478             }
2479             break;
2480
2481         default:
2482             break;
2483     }
2484
2485     if (*conversion_type != WINED3D_CT_NONE)
2486     {
2487         format->rtInternal = format->glInternal;
2488         format->glGammaInternal = format->glInternal;
2489     }
2490
2491     return WINED3D_OK;
2492 }
2493
2494 static BOOL surface_check_block_align(struct wined3d_surface *surface, const RECT *rect)
2495 {
2496     UINT width_mask, height_mask;
2497
2498     if (!rect->left && !rect->top
2499             && rect->right == surface->resource.width
2500             && rect->bottom == surface->resource.height)
2501         return TRUE;
2502
2503     /* This assumes power of two block sizes, but NPOT block sizes would be
2504      * silly anyway. */
2505     width_mask = surface->resource.format->block_width - 1;
2506     height_mask = surface->resource.format->block_height - 1;
2507
2508     if (!(rect->left & width_mask) && !(rect->top & height_mask)
2509             && !(rect->right & width_mask) && !(rect->bottom & height_mask))
2510         return TRUE;
2511
2512     return FALSE;
2513 }
2514
2515 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2516         struct wined3d_surface *src_surface, const RECT *src_rect)
2517 {
2518     const struct wined3d_format *src_format;
2519     const struct wined3d_format *dst_format;
2520     const struct wined3d_gl_info *gl_info;
2521     enum wined3d_conversion_type convert;
2522     struct wined3d_context *context;
2523     struct wined3d_bo_address data;
2524     struct wined3d_format format;
2525     UINT update_w, update_h;
2526     UINT dst_w, dst_h;
2527     RECT r, dst_rect;
2528     UINT src_pitch;
2529     POINT p;
2530
2531     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2532             dst_surface, wine_dbgstr_point(dst_point),
2533             src_surface, wine_dbgstr_rect(src_rect));
2534
2535     src_format = src_surface->resource.format;
2536     dst_format = dst_surface->resource.format;
2537
2538     if (src_format->id != dst_format->id)
2539     {
2540         WARN("Source and destination surfaces should have the same format.\n");
2541         return WINED3DERR_INVALIDCALL;
2542     }
2543
2544     if (!dst_point)
2545     {
2546         p.x = 0;
2547         p.y = 0;
2548         dst_point = &p;
2549     }
2550     else if (dst_point->x < 0 || dst_point->y < 0)
2551     {
2552         WARN("Invalid destination point.\n");
2553         return WINED3DERR_INVALIDCALL;
2554     }
2555
2556     if (!src_rect)
2557     {
2558         r.left = 0;
2559         r.top = 0;
2560         r.right = src_surface->resource.width;
2561         r.bottom = src_surface->resource.height;
2562         src_rect = &r;
2563     }
2564     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2565             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2566     {
2567         WARN("Invalid source rectangle.\n");
2568         return WINED3DERR_INVALIDCALL;
2569     }
2570
2571     dst_w = dst_surface->resource.width;
2572     dst_h = dst_surface->resource.height;
2573
2574     update_w = src_rect->right - src_rect->left;
2575     update_h = src_rect->bottom - src_rect->top;
2576
2577     if (update_w > dst_w || dst_point->x > dst_w - update_w
2578             || update_h > dst_h || dst_point->y > dst_h - update_h)
2579     {
2580         WARN("Destination out of bounds.\n");
2581         return WINED3DERR_INVALIDCALL;
2582     }
2583
2584     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS) && !surface_check_block_align(src_surface, src_rect))
2585     {
2586         WARN("Source rectangle not block-aligned.\n");
2587         return WINED3DERR_INVALIDCALL;
2588     }
2589
2590     SetRect(&dst_rect, dst_point->x, dst_point->y, dst_point->x + update_w, dst_point->y + update_h);
2591     if ((dst_format->flags & WINED3DFMT_FLAG_BLOCKS) && !surface_check_block_align(dst_surface, &dst_rect))
2592     {
2593         WARN("Destination rectangle not block-aligned.\n");
2594         return WINED3DERR_INVALIDCALL;
2595     }
2596
2597     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2598     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2599     if (convert != WINED3D_CT_NONE || format.convert)
2600         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3D_TEXF_POINT);
2601
2602     context = context_acquire(dst_surface->resource.device, NULL);
2603     gl_info = context->gl_info;
2604
2605     /* Only load the surface for partial updates. For newly allocated texture
2606      * the texture wouldn't be the current location, and we'd upload zeroes
2607      * just to overwrite them again. */
2608     if (update_w == dst_w && update_h == dst_h)
2609         surface_prepare_texture(dst_surface, context, FALSE);
2610     else
2611         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2612     surface_bind(dst_surface, context, FALSE);
2613
2614     data.buffer_object = src_surface->pbo;
2615     data.addr = src_surface->resource.allocatedMemory;
2616     src_pitch = wined3d_surface_get_pitch(src_surface);
2617
2618     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2619
2620     invalidate_active_texture(dst_surface->resource.device, context);
2621
2622     context_release(context);
2623
2624     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2625     return WINED3D_OK;
2626 }
2627
2628 /* This call just allocates the texture, the caller is responsible for binding
2629  * the correct texture. */
2630 /* Context activation is done by the caller. */
2631 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2632         const struct wined3d_format *format, BOOL srgb)
2633 {
2634     BOOL enable_client_storage = FALSE;
2635     GLsizei width = surface->pow2Width;
2636     GLsizei height = surface->pow2Height;
2637     const BYTE *mem = NULL;
2638     GLenum internal;
2639
2640     if (srgb)
2641     {
2642         internal = format->glGammaInternal;
2643     }
2644     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2645     {
2646         internal = format->rtInternal;
2647     }
2648     else
2649     {
2650         internal = format->glInternal;
2651     }
2652
2653     if (!internal)
2654         FIXME("No GL internal format for format %s.\n", debug_d3dformat(format->id));
2655
2656     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2657     {
2658         height *= format->height_scale.numerator;
2659         height /= format->height_scale.denominator;
2660     }
2661
2662     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2663             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2664             internal, width, height, format->glFormat, format->glType);
2665
2666     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2667     {
2668         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2669                 || !surface->resource.allocatedMemory)
2670         {
2671             /* In some cases we want to disable client storage.
2672              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2673              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2674              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2675              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2676              */
2677             gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2678             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2679             surface->flags &= ~SFLAG_CLIENT;
2680             enable_client_storage = TRUE;
2681         }
2682         else
2683         {
2684             surface->flags |= SFLAG_CLIENT;
2685
2686             /* Point OpenGL to our allocated texture memory. Do not use
2687              * resource.allocatedMemory here because it might point into a
2688              * PBO. Instead use heapMemory, but get the alignment right. */
2689             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2690                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2691         }
2692     }
2693
2694     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2695     {
2696         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2697                 internal, width, height, 0, surface->resource.size, mem));
2698         checkGLcall("glCompressedTexImage2DARB");
2699     }
2700     else
2701     {
2702         gl_info->gl_ops.gl.p_glTexImage2D(surface->texture_target, surface->texture_level,
2703                 internal, width, height, 0, format->glFormat, format->glType, mem);
2704         checkGLcall("glTexImage2D");
2705     }
2706
2707     if (enable_client_storage)
2708     {
2709         gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2710         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2711     }
2712 }
2713
2714 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2715  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2716 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2717 /* Context activation is done by the caller. */
2718 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2719 {
2720     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2721     struct wined3d_renderbuffer_entry *entry;
2722     GLuint renderbuffer = 0;
2723     unsigned int src_width, src_height;
2724     unsigned int width, height;
2725
2726     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2727     {
2728         width = rt->pow2Width;
2729         height = rt->pow2Height;
2730     }
2731     else
2732     {
2733         width = surface->pow2Width;
2734         height = surface->pow2Height;
2735     }
2736
2737     src_width = surface->pow2Width;
2738     src_height = surface->pow2Height;
2739
2740     /* A depth stencil smaller than the render target is not valid */
2741     if (width > src_width || height > src_height) return;
2742
2743     /* Remove any renderbuffer set if the sizes match */
2744     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2745             || (width == src_width && height == src_height))
2746     {
2747         surface->current_renderbuffer = NULL;
2748         return;
2749     }
2750
2751     /* Look if we've already got a renderbuffer of the correct dimensions */
2752     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2753     {
2754         if (entry->width == width && entry->height == height)
2755         {
2756             renderbuffer = entry->id;
2757             surface->current_renderbuffer = entry;
2758             break;
2759         }
2760     }
2761
2762     if (!renderbuffer)
2763     {
2764         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2765         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2766         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2767                 surface->resource.format->glInternal, width, height);
2768
2769         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2770         entry->width = width;
2771         entry->height = height;
2772         entry->id = renderbuffer;
2773         list_add_head(&surface->renderbuffers, &entry->entry);
2774
2775         surface->current_renderbuffer = entry;
2776     }
2777
2778     checkGLcall("set_compatible_renderbuffer");
2779 }
2780
2781 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2782 {
2783     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2784
2785     TRACE("surface %p.\n", surface);
2786
2787     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2788     {
2789         ERR("Surface %p is not on a swapchain.\n", surface);
2790         return GL_NONE;
2791     }
2792
2793     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2794     {
2795         if (swapchain->render_to_fbo)
2796         {
2797             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2798             return GL_COLOR_ATTACHMENT0;
2799         }
2800         TRACE("Returning GL_BACK\n");
2801         return GL_BACK;
2802     }
2803     else if (surface == swapchain->front_buffer)
2804     {
2805         TRACE("Returning GL_FRONT\n");
2806         return GL_FRONT;
2807     }
2808
2809     FIXME("Higher back buffer, returning GL_BACK\n");
2810     return GL_BACK;
2811 }
2812
2813 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2814 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2815 {
2816     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2817
2818     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2819         /* No partial locking for textures yet. */
2820         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2821
2822     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2823     if (dirty_rect)
2824     {
2825         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2826         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2827         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2828         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2829     }
2830     else
2831     {
2832         surface->dirtyRect.left = 0;
2833         surface->dirtyRect.top = 0;
2834         surface->dirtyRect.right = surface->resource.width;
2835         surface->dirtyRect.bottom = surface->resource.height;
2836     }
2837
2838     /* if the container is a texture then mark it dirty. */
2839     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2840     {
2841         TRACE("Passing to container.\n");
2842         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2843     }
2844 }
2845
2846 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2847 {
2848     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2849     BOOL ck_changed;
2850
2851     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2852
2853     if (surface->resource.pool == WINED3D_POOL_SCRATCH)
2854     {
2855         ERR("Not supported on scratch surfaces.\n");
2856         return WINED3DERR_INVALIDCALL;
2857     }
2858
2859     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2860
2861     /* Reload if either the texture and sysmem have different ideas about the
2862      * color key, or the actual key values changed. */
2863     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2864             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2865             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2866     {
2867         TRACE("Reloading because of color keying\n");
2868         /* To perform the color key conversion we need a sysmem copy of
2869          * the surface. Make sure we have it. */
2870
2871         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2872         /* Make sure the texture is reloaded because of the color key change,
2873          * this kills performance though :( */
2874         /* TODO: This is not necessarily needed with hw palettized texture support. */
2875         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2876         /* Switching color keying on / off may change the internal format. */
2877         if (ck_changed)
2878             surface_force_reload(surface);
2879     }
2880     else if (!(surface->flags & flag))
2881     {
2882         TRACE("Reloading because surface is dirty.\n");
2883     }
2884     else
2885     {
2886         TRACE("surface is already in texture\n");
2887         return WINED3D_OK;
2888     }
2889
2890     /* No partial locking for textures yet. */
2891     surface_load_location(surface, flag, NULL);
2892     surface_evict_sysmem(surface);
2893
2894     return WINED3D_OK;
2895 }
2896
2897 /* See also float_16_to_32() in wined3d_private.h */
2898 static inline unsigned short float_32_to_16(const float *in)
2899 {
2900     int exp = 0;
2901     float tmp = fabsf(*in);
2902     unsigned int mantissa;
2903     unsigned short ret;
2904
2905     /* Deal with special numbers */
2906     if (*in == 0.0f)
2907         return 0x0000;
2908     if (isnan(*in))
2909         return 0x7c01;
2910     if (isinf(*in))
2911         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2912
2913     if (tmp < powf(2, 10))
2914     {
2915         do
2916         {
2917             tmp = tmp * 2.0f;
2918             exp--;
2919         } while (tmp < powf(2, 10));
2920     }
2921     else if (tmp >= powf(2, 11))
2922     {
2923         do
2924         {
2925             tmp /= 2.0f;
2926             exp++;
2927         } while (tmp >= powf(2, 11));
2928     }
2929
2930     mantissa = (unsigned int)tmp;
2931     if (tmp - mantissa >= 0.5f)
2932         ++mantissa; /* Round to nearest, away from zero. */
2933
2934     exp += 10;  /* Normalize the mantissa. */
2935     exp += 15;  /* Exponent is encoded with excess 15. */
2936
2937     if (exp > 30) /* too big */
2938     {
2939         ret = 0x7c00; /* INF */
2940     }
2941     else if (exp <= 0)
2942     {
2943         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2944         while (exp <= 0)
2945         {
2946             mantissa = mantissa >> 1;
2947             ++exp;
2948         }
2949         ret = mantissa & 0x3ff;
2950     }
2951     else
2952     {
2953         ret = (exp << 10) | (mantissa & 0x3ff);
2954     }
2955
2956     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2957     return ret;
2958 }
2959
2960 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2961 {
2962     ULONG refcount;
2963
2964     TRACE("Surface %p, container %p of type %#x.\n",
2965             surface, surface->container.u.base, surface->container.type);
2966
2967     switch (surface->container.type)
2968     {
2969         case WINED3D_CONTAINER_TEXTURE:
2970             return wined3d_texture_incref(surface->container.u.texture);
2971
2972         case WINED3D_CONTAINER_SWAPCHAIN:
2973             return wined3d_swapchain_incref(surface->container.u.swapchain);
2974
2975         default:
2976             ERR("Unhandled container type %#x.\n", surface->container.type);
2977         case WINED3D_CONTAINER_NONE:
2978             break;
2979     }
2980
2981     refcount = InterlockedIncrement(&surface->resource.ref);
2982     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2983
2984     return refcount;
2985 }
2986
2987 /* Do not call while under the GL lock. */
2988 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2989 {
2990     ULONG refcount;
2991
2992     TRACE("Surface %p, container %p of type %#x.\n",
2993             surface, surface->container.u.base, surface->container.type);
2994
2995     switch (surface->container.type)
2996     {
2997         case WINED3D_CONTAINER_TEXTURE:
2998             return wined3d_texture_decref(surface->container.u.texture);
2999
3000         case WINED3D_CONTAINER_SWAPCHAIN:
3001             return wined3d_swapchain_decref(surface->container.u.swapchain);
3002
3003         default:
3004             ERR("Unhandled container type %#x.\n", surface->container.type);
3005         case WINED3D_CONTAINER_NONE:
3006             break;
3007     }
3008
3009     refcount = InterlockedDecrement(&surface->resource.ref);
3010     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
3011
3012     if (!refcount)
3013     {
3014         surface_cleanup(surface);
3015         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
3016
3017         TRACE("Destroyed surface %p.\n", surface);
3018         HeapFree(GetProcessHeap(), 0, surface);
3019     }
3020
3021     return refcount;
3022 }
3023
3024 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
3025 {
3026     return resource_set_priority(&surface->resource, priority);
3027 }
3028
3029 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
3030 {
3031     return resource_get_priority(&surface->resource);
3032 }
3033
3034 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
3035 {
3036     TRACE("surface %p.\n", surface);
3037
3038     if (!surface->resource.device->d3d_initialized)
3039     {
3040         ERR("D3D not initialized.\n");
3041         return;
3042     }
3043
3044     surface_internal_preload(surface, SRGB_ANY);
3045 }
3046
3047 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
3048 {
3049     TRACE("surface %p.\n", surface);
3050
3051     return surface->resource.parent;
3052 }
3053
3054 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
3055 {
3056     TRACE("surface %p.\n", surface);
3057
3058     return &surface->resource;
3059 }
3060
3061 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
3062 {
3063     TRACE("surface %p, flags %#x.\n", surface, flags);
3064
3065     switch (flags)
3066     {
3067         case WINEDDGBS_CANBLT:
3068         case WINEDDGBS_ISBLTDONE:
3069             return WINED3D_OK;
3070
3071         default:
3072             return WINED3DERR_INVALIDCALL;
3073     }
3074 }
3075
3076 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
3077 {
3078     TRACE("surface %p, flags %#x.\n", surface, flags);
3079
3080     /* XXX: DDERR_INVALIDSURFACETYPE */
3081
3082     switch (flags)
3083     {
3084         case WINEDDGFS_CANFLIP:
3085         case WINEDDGFS_ISFLIPDONE:
3086             return WINED3D_OK;
3087
3088         default:
3089             return WINED3DERR_INVALIDCALL;
3090     }
3091 }
3092
3093 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
3094 {
3095     TRACE("surface %p.\n", surface);
3096
3097     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
3098     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
3099 }
3100
3101 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
3102 {
3103     TRACE("surface %p.\n", surface);
3104
3105     surface->flags &= ~SFLAG_LOST;
3106     return WINED3D_OK;
3107 }
3108
3109 void CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
3110 {
3111     TRACE("surface %p, palette %p.\n", surface, palette);
3112
3113     if (surface->palette == palette)
3114     {
3115         TRACE("Nop palette change.\n");
3116         return;
3117     }
3118
3119     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3120         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3121
3122     surface->palette = palette;
3123
3124     if (palette)
3125     {
3126         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3127             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3128
3129         surface->surface_ops->surface_realize_palette(surface);
3130     }
3131 }
3132
3133 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3134         DWORD flags, const struct wined3d_color_key *color_key)
3135 {
3136     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3137
3138     if (flags & WINEDDCKEY_COLORSPACE)
3139     {
3140         FIXME(" colorkey value not supported (%08x) !\n", flags);
3141         return WINED3DERR_INVALIDCALL;
3142     }
3143
3144     /* Dirtify the surface, but only if a key was changed. */
3145     if (color_key)
3146     {
3147         switch (flags & ~WINEDDCKEY_COLORSPACE)
3148         {
3149             case WINEDDCKEY_DESTBLT:
3150                 surface->dst_blt_color_key = *color_key;
3151                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3152                 break;
3153
3154             case WINEDDCKEY_DESTOVERLAY:
3155                 surface->dst_overlay_color_key = *color_key;
3156                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3157                 break;
3158
3159             case WINEDDCKEY_SRCOVERLAY:
3160                 surface->src_overlay_color_key = *color_key;
3161                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3162                 break;
3163
3164             case WINEDDCKEY_SRCBLT:
3165                 surface->src_blt_color_key = *color_key;
3166                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3167                 break;
3168         }
3169     }
3170     else
3171     {
3172         switch (flags & ~WINEDDCKEY_COLORSPACE)
3173         {
3174             case WINEDDCKEY_DESTBLT:
3175                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3176                 break;
3177
3178             case WINEDDCKEY_DESTOVERLAY:
3179                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3180                 break;
3181
3182             case WINEDDCKEY_SRCOVERLAY:
3183                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3184                 break;
3185
3186             case WINEDDCKEY_SRCBLT:
3187                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3188                 break;
3189         }
3190     }
3191
3192     return WINED3D_OK;
3193 }
3194
3195 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3196 {
3197     TRACE("surface %p.\n", surface);
3198
3199     return surface->palette;
3200 }
3201
3202 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3203 {
3204     const struct wined3d_format *format = surface->resource.format;
3205     DWORD pitch;
3206
3207     TRACE("surface %p.\n", surface);
3208
3209     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3210     {
3211         /* Since compressed formats are block based, pitch means the amount of
3212          * bytes to the next row of block rather than the next row of pixels. */
3213         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3214         pitch = row_block_count * format->block_byte_count;
3215     }
3216     else
3217     {
3218         unsigned char alignment = surface->resource.device->surface_alignment;
3219         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3220         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3221     }
3222
3223     TRACE("Returning %u.\n", pitch);
3224
3225     return pitch;
3226 }
3227
3228 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3229 {
3230     TRACE("surface %p, mem %p.\n", surface, mem);
3231
3232     if (surface->resource.map_count || (surface->flags & SFLAG_DCINUSE))
3233     {
3234         WARN("Surface is mapped or the DC is in use.\n");
3235         return WINED3DERR_INVALIDCALL;
3236     }
3237
3238     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3239     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3240     {
3241         ERR("Not supported on render targets.\n");
3242         return WINED3DERR_INVALIDCALL;
3243     }
3244
3245     if (mem && mem != surface->resource.allocatedMemory)
3246     {
3247         void *release = NULL;
3248
3249         /* Do I have to copy the old surface content? */
3250         if (surface->flags & SFLAG_DIBSECTION)
3251         {
3252             DeleteDC(surface->hDC);
3253             DeleteObject(surface->dib.DIBsection);
3254             surface->dib.bitmap_data = NULL;
3255             surface->resource.allocatedMemory = NULL;
3256             surface->hDC = NULL;
3257             surface->flags &= ~SFLAG_DIBSECTION;
3258         }
3259         else if (!(surface->flags & SFLAG_USERPTR))
3260         {
3261             release = surface->resource.heapMemory;
3262             surface->resource.heapMemory = NULL;
3263         }
3264         surface->resource.allocatedMemory = mem;
3265         surface->flags |= SFLAG_USERPTR;
3266
3267         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3268         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3269
3270         /* For client textures OpenGL has to be notified. */
3271         if (surface->flags & SFLAG_CLIENT)
3272             surface_release_client_storage(surface);
3273
3274         /* Now free the old memory if any. */
3275         HeapFree(GetProcessHeap(), 0, release);
3276     }
3277     else if (surface->flags & SFLAG_USERPTR)
3278     {
3279         /* HeapMemory should be NULL already. */
3280         if (surface->resource.heapMemory)
3281             ERR("User pointer surface has heap memory allocated.\n");
3282
3283         if (!mem)
3284         {
3285             surface->resource.allocatedMemory = NULL;
3286             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3287
3288             if (surface->flags & SFLAG_CLIENT)
3289                 surface_release_client_storage(surface);
3290
3291             surface_prepare_system_memory(surface);
3292         }
3293
3294         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3295     }
3296
3297     return WINED3D_OK;
3298 }
3299
3300 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3301 {
3302     LONG w, h;
3303
3304     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3305
3306     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3307     {
3308         WARN("Not an overlay surface.\n");
3309         return WINEDDERR_NOTAOVERLAYSURFACE;
3310     }
3311
3312     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3313     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3314     surface->overlay_destrect.left = x;
3315     surface->overlay_destrect.top = y;
3316     surface->overlay_destrect.right = x + w;
3317     surface->overlay_destrect.bottom = y + h;
3318
3319     surface_draw_overlay(surface);
3320
3321     return WINED3D_OK;
3322 }
3323
3324 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3325 {
3326     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3327
3328     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3329     {
3330         TRACE("Not an overlay surface.\n");
3331         return WINEDDERR_NOTAOVERLAYSURFACE;
3332     }
3333
3334     if (!surface->overlay_dest)
3335     {
3336         TRACE("Overlay not visible.\n");
3337         *x = 0;
3338         *y = 0;
3339         return WINEDDERR_OVERLAYNOTVISIBLE;
3340     }
3341
3342     *x = surface->overlay_destrect.left;
3343     *y = surface->overlay_destrect.top;
3344
3345     TRACE("Returning position %d, %d.\n", *x, *y);
3346
3347     return WINED3D_OK;
3348 }
3349
3350 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3351         DWORD flags, struct wined3d_surface *ref)
3352 {
3353     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3354
3355     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3356     {
3357         TRACE("Not an overlay surface.\n");
3358         return WINEDDERR_NOTAOVERLAYSURFACE;
3359     }
3360
3361     return WINED3D_OK;
3362 }
3363
3364 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3365         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3366 {
3367     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3368             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3369
3370     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3371     {
3372         WARN("Not an overlay surface.\n");
3373         return WINEDDERR_NOTAOVERLAYSURFACE;
3374     }
3375     else if (!dst_surface)
3376     {
3377         WARN("Dest surface is NULL.\n");
3378         return WINED3DERR_INVALIDCALL;
3379     }
3380
3381     if (src_rect)
3382     {
3383         surface->overlay_srcrect = *src_rect;
3384     }
3385     else
3386     {
3387         surface->overlay_srcrect.left = 0;
3388         surface->overlay_srcrect.top = 0;
3389         surface->overlay_srcrect.right = surface->resource.width;
3390         surface->overlay_srcrect.bottom = surface->resource.height;
3391     }
3392
3393     if (dst_rect)
3394     {
3395         surface->overlay_destrect = *dst_rect;
3396     }
3397     else
3398     {
3399         surface->overlay_destrect.left = 0;
3400         surface->overlay_destrect.top = 0;
3401         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3402         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3403     }
3404
3405     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3406     {
3407         surface->overlay_dest = NULL;
3408         list_remove(&surface->overlay_entry);
3409     }
3410
3411     if (flags & WINEDDOVER_SHOW)
3412     {
3413         if (surface->overlay_dest != dst_surface)
3414         {
3415             surface->overlay_dest = dst_surface;
3416             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3417         }
3418     }
3419     else if (flags & WINEDDOVER_HIDE)
3420     {
3421         /* tests show that the rectangles are erased on hide */
3422         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3423         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3424         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3425         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3426         surface->overlay_dest = NULL;
3427     }
3428
3429     surface_draw_overlay(surface);
3430
3431     return WINED3D_OK;
3432 }
3433
3434 HRESULT CDECL wined3d_surface_update_desc(struct wined3d_surface *surface,
3435         UINT width, UINT height, enum wined3d_format_id format_id,
3436         enum wined3d_multisample_type multisample_type, UINT multisample_quality)
3437 {
3438     struct wined3d_device *device = surface->resource.device;
3439     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3440     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
3441     UINT resource_size = wined3d_format_calculate_size(format, device->surface_alignment, width, height);
3442
3443     TRACE("surface %p, width %u, height %u, format %s, multisample_type %#x, multisample_quality %u.\n",
3444             surface, width, height, debug_d3dformat(format_id), multisample_type, multisample_type);
3445
3446     if (!resource_size)
3447         return WINED3DERR_INVALIDCALL;
3448
3449     if (device->d3d_initialized)
3450         surface->resource.resource_ops->resource_unload(&surface->resource);
3451
3452     if (surface->flags & SFLAG_DIBSECTION)
3453     {
3454         DeleteDC(surface->hDC);
3455         DeleteObject(surface->dib.DIBsection);
3456         surface->dib.bitmap_data = NULL;
3457         surface->flags &= ~SFLAG_DIBSECTION;
3458     }
3459
3460     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_USERPTR);
3461     surface->resource.allocatedMemory = NULL;
3462     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3463     surface->resource.heapMemory = NULL;
3464
3465     surface->resource.width = width;
3466     surface->resource.height = height;
3467     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[ARB_TEXTURE_RECTANGLE]
3468             || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
3469     {
3470         surface->pow2Width = width;
3471         surface->pow2Height = height;
3472     }
3473     else
3474     {
3475         surface->pow2Width = surface->pow2Height = 1;
3476         while (surface->pow2Width < width)
3477             surface->pow2Width <<= 1;
3478         while (surface->pow2Height < height)
3479             surface->pow2Height <<= 1;
3480     }
3481
3482     if (surface->pow2Width != width || surface->pow2Height != height)
3483         surface->flags |= SFLAG_NONPOW2;
3484     else
3485         surface->flags &= ~SFLAG_NONPOW2;
3486
3487     surface->resource.format = format;
3488     surface->resource.multisample_type = multisample_type;
3489     surface->resource.multisample_quality = multisample_quality;
3490     surface->resource.size = resource_size;
3491
3492     if (!surface_init_sysmem(surface))
3493         return E_OUTOFMEMORY;
3494
3495     return WINED3D_OK;
3496 }
3497
3498 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3499         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3500 {
3501     unsigned short *dst_s;
3502     const float *src_f;
3503     unsigned int x, y;
3504
3505     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3506
3507     for (y = 0; y < h; ++y)
3508     {
3509         src_f = (const float *)(src + y * pitch_in);
3510         dst_s = (unsigned short *) (dst + y * pitch_out);
3511         for (x = 0; x < w; ++x)
3512         {
3513             dst_s[x] = float_32_to_16(src_f + x);
3514         }
3515     }
3516 }
3517
3518 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3519         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3520 {
3521     static const unsigned char convert_5to8[] =
3522     {
3523         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3524         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3525         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3526         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3527     };
3528     static const unsigned char convert_6to8[] =
3529     {
3530         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3531         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3532         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3533         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3534         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3535         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3536         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3537         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3538     };
3539     unsigned int x, y;
3540
3541     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3542
3543     for (y = 0; y < h; ++y)
3544     {
3545         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3546         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3547         for (x = 0; x < w; ++x)
3548         {
3549             WORD pixel = src_line[x];
3550             dst_line[x] = 0xff000000
3551                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3552                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3553                     | convert_5to8[(pixel & 0x001f)];
3554         }
3555     }
3556 }
3557
3558 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3559  * in both cases we're just setting the X / Alpha channel to 0xff. */
3560 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3561         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3562 {
3563     unsigned int x, y;
3564
3565     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3566
3567     for (y = 0; y < h; ++y)
3568     {
3569         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3570         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3571
3572         for (x = 0; x < w; ++x)
3573         {
3574             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3575         }
3576     }
3577 }
3578
3579 static inline BYTE cliptobyte(int x)
3580 {
3581     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3582 }
3583
3584 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3585         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3586 {
3587     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3588     unsigned int x, y;
3589
3590     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3591
3592     for (y = 0; y < h; ++y)
3593     {
3594         const BYTE *src_line = src + y * pitch_in;
3595         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3596         for (x = 0; x < w; ++x)
3597         {
3598             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3599              *     C = Y - 16; D = U - 128; E = V - 128;
3600              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3601              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3602              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3603              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3604              * U and V are shared between the pixels. */
3605             if (!(x & 1)) /* For every even pixel, read new U and V. */
3606             {
3607                 d = (int) src_line[1] - 128;
3608                 e = (int) src_line[3] - 128;
3609                 r2 = 409 * e + 128;
3610                 g2 = - 100 * d - 208 * e + 128;
3611                 b2 = 516 * d + 128;
3612             }
3613             c2 = 298 * ((int) src_line[0] - 16);
3614             dst_line[x] = 0xff000000
3615                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3616                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3617                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3618                 /* Scale RGB values to 0..255 range,
3619                  * then clip them if still not in range (may be negative),
3620                  * then shift them within DWORD if necessary. */
3621             src_line += 2;
3622         }
3623     }
3624 }
3625
3626 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3627         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3628 {
3629     unsigned int x, y;
3630     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3631
3632     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3633
3634     for (y = 0; y < h; ++y)
3635     {
3636         const BYTE *src_line = src + y * pitch_in;
3637         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3638         for (x = 0; x < w; ++x)
3639         {
3640             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3641              *     C = Y - 16; D = U - 128; E = V - 128;
3642              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3643              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3644              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3645              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3646              * U and V are shared between the pixels. */
3647             if (!(x & 1)) /* For every even pixel, read new U and V. */
3648             {
3649                 d = (int) src_line[1] - 128;
3650                 e = (int) src_line[3] - 128;
3651                 r2 = 409 * e + 128;
3652                 g2 = - 100 * d - 208 * e + 128;
3653                 b2 = 516 * d + 128;
3654             }
3655             c2 = 298 * ((int) src_line[0] - 16);
3656             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3657                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3658                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3659                 /* Scale RGB values to 0..255 range,
3660                  * then clip them if still not in range (may be negative),
3661                  * then shift them within DWORD if necessary. */
3662             src_line += 2;
3663         }
3664     }
3665 }
3666
3667 struct d3dfmt_converter_desc
3668 {
3669     enum wined3d_format_id from, to;
3670     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3671 };
3672
3673 static const struct d3dfmt_converter_desc converters[] =
3674 {
3675     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3676     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3677     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3678     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3679     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3680     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3681 };
3682
3683 static inline const struct d3dfmt_converter_desc *find_converter(enum wined3d_format_id from,
3684         enum wined3d_format_id to)
3685 {
3686     unsigned int i;
3687
3688     for (i = 0; i < (sizeof(converters) / sizeof(*converters)); ++i)
3689     {
3690         if (converters[i].from == from && converters[i].to == to)
3691             return &converters[i];
3692     }
3693
3694     return NULL;
3695 }
3696
3697 /*****************************************************************************
3698  * surface_convert_format
3699  *
3700  * Creates a duplicate of a surface in a different format. Is used by Blt to
3701  * blit between surfaces with different formats.
3702  *
3703  * Parameters
3704  *  source: Source surface
3705  *  fmt: Requested destination format
3706  *
3707  *****************************************************************************/
3708 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3709 {
3710     struct wined3d_map_desc src_map, dst_map;
3711     const struct d3dfmt_converter_desc *conv;
3712     struct wined3d_surface *ret = NULL;
3713     HRESULT hr;
3714
3715     conv = find_converter(source->resource.format->id, to_fmt);
3716     if (!conv)
3717     {
3718         FIXME("Cannot find a conversion function from format %s to %s.\n",
3719                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3720         return NULL;
3721     }
3722
3723     /* FIXME: Multisampled conversion? */
3724     if (FAILED(hr = wined3d_surface_create(source->resource.device, source->resource.width, source->resource.height,
3725             to_fmt, 0, WINED3D_POOL_SCRATCH, WINED3D_MULTISAMPLE_NONE, 0, source->surface_type,
3726             WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD, NULL, &wined3d_null_parent_ops, &ret)))
3727     {
3728         ERR("Failed to create a destination surface for conversion.\n");
3729         return NULL;
3730     }
3731
3732     memset(&src_map, 0, sizeof(src_map));
3733     memset(&dst_map, 0, sizeof(dst_map));
3734
3735     if (FAILED(hr = wined3d_surface_map(source, &src_map, NULL, WINED3D_MAP_READONLY)))
3736     {
3737         ERR("Failed to lock the source surface.\n");
3738         wined3d_surface_decref(ret);
3739         return NULL;
3740     }
3741     if (FAILED(hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3D_MAP_READONLY)))
3742     {
3743         ERR("Failed to lock the destination surface.\n");
3744         wined3d_surface_unmap(source);
3745         wined3d_surface_decref(ret);
3746         return NULL;
3747     }
3748
3749     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3750             source->resource.width, source->resource.height);
3751
3752     wined3d_surface_unmap(ret);
3753     wined3d_surface_unmap(source);
3754
3755     return ret;
3756 }
3757
3758 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3759         unsigned int bpp, UINT pitch, DWORD color)
3760 {
3761     BYTE *first;
3762     int x, y;
3763
3764     /* Do first row */
3765
3766 #define COLORFILL_ROW(type) \
3767 do { \
3768     type *d = (type *)buf; \
3769     for (x = 0; x < width; ++x) \
3770         d[x] = (type)color; \
3771 } while(0)
3772
3773     switch (bpp)
3774     {
3775         case 1:
3776             COLORFILL_ROW(BYTE);
3777             break;
3778
3779         case 2:
3780             COLORFILL_ROW(WORD);
3781             break;
3782
3783         case 3:
3784         {
3785             BYTE *d = buf;
3786             for (x = 0; x < width; ++x, d += 3)
3787             {
3788                 d[0] = (color      ) & 0xff;
3789                 d[1] = (color >>  8) & 0xff;
3790                 d[2] = (color >> 16) & 0xff;
3791             }
3792             break;
3793         }
3794         case 4:
3795             COLORFILL_ROW(DWORD);
3796             break;
3797
3798         default:
3799             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3800             return WINED3DERR_NOTAVAILABLE;
3801     }
3802
3803 #undef COLORFILL_ROW
3804
3805     /* Now copy first row. */
3806     first = buf;
3807     for (y = 1; y < height; ++y)
3808     {
3809         buf += pitch;
3810         memcpy(buf, first, width * bpp);
3811     }
3812
3813     return WINED3D_OK;
3814 }
3815
3816 struct wined3d_surface * CDECL wined3d_surface_from_resource(struct wined3d_resource *resource)
3817 {
3818     return surface_from_resource(resource);
3819 }
3820
3821 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3822 {
3823     TRACE("surface %p.\n", surface);
3824
3825     if (!surface->resource.map_count)
3826     {
3827         WARN("Trying to unmap unmapped surface.\n");
3828         return WINEDDERR_NOTLOCKED;
3829     }
3830     --surface->resource.map_count;
3831
3832     surface->surface_ops->surface_unmap(surface);
3833
3834     return WINED3D_OK;
3835 }
3836
3837 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3838         struct wined3d_map_desc *map_desc, const RECT *rect, DWORD flags)
3839 {
3840     const struct wined3d_format *format = surface->resource.format;
3841
3842     TRACE("surface %p, map_desc %p, rect %s, flags %#x.\n",
3843             surface, map_desc, wine_dbgstr_rect(rect), flags);
3844
3845     if (surface->resource.map_count)
3846     {
3847         WARN("Surface is already mapped.\n");
3848         return WINED3DERR_INVALIDCALL;
3849     }
3850
3851     if ((format->flags & WINED3DFMT_FLAG_BLOCKS) && rect
3852             && !surface_check_block_align(surface, rect))
3853     {
3854         WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3855                 wine_dbgstr_rect(rect), format->block_width, format->block_height);
3856
3857         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3858             return WINED3DERR_INVALIDCALL;
3859     }
3860
3861     ++surface->resource.map_count;
3862
3863     if (!(surface->flags & SFLAG_LOCKABLE))
3864         WARN("Trying to lock unlockable surface.\n");
3865
3866     /* Performance optimization: Count how often a surface is mapped, if it is
3867      * mapped regularly do not throw away the system memory copy. This avoids
3868      * the need to download the surface from OpenGL all the time. The surface
3869      * is still downloaded if the OpenGL texture is changed. */
3870     if (!(surface->flags & SFLAG_DYNLOCK))
3871     {
3872         if (++surface->lockCount > MAXLOCKCOUNT)
3873         {
3874             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3875             surface->flags |= SFLAG_DYNLOCK;
3876         }
3877     }
3878
3879     surface->surface_ops->surface_map(surface, rect, flags);
3880
3881     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3882         map_desc->row_pitch = surface->resource.width * format->byte_count;
3883     else
3884         map_desc->row_pitch = wined3d_surface_get_pitch(surface);
3885     map_desc->slice_pitch = 0;
3886
3887     if (!rect)
3888     {
3889         map_desc->data = surface->resource.allocatedMemory;
3890         surface->lockedRect.left = 0;
3891         surface->lockedRect.top = 0;
3892         surface->lockedRect.right = surface->resource.width;
3893         surface->lockedRect.bottom = surface->resource.height;
3894     }
3895     else
3896     {
3897         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3898         {
3899             /* Compressed textures are block based, so calculate the offset of
3900              * the block that contains the top-left pixel of the locked rectangle. */
3901             map_desc->data = surface->resource.allocatedMemory
3902                     + ((rect->top / format->block_height) * map_desc->row_pitch)
3903                     + ((rect->left / format->block_width) * format->block_byte_count);
3904         }
3905         else
3906         {
3907             map_desc->data = surface->resource.allocatedMemory
3908                     + (map_desc->row_pitch * rect->top)
3909                     + (rect->left * format->byte_count);
3910         }
3911         surface->lockedRect.left = rect->left;
3912         surface->lockedRect.top = rect->top;
3913         surface->lockedRect.right = rect->right;
3914         surface->lockedRect.bottom = rect->bottom;
3915     }
3916
3917     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3918     TRACE("Returning memory %p, pitch %u.\n", map_desc->data, map_desc->row_pitch);
3919
3920     return WINED3D_OK;
3921 }
3922
3923 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3924 {
3925     struct wined3d_map_desc map;
3926     HRESULT hr;
3927
3928     TRACE("surface %p, dc %p.\n", surface, dc);
3929
3930     if (surface->flags & SFLAG_USERPTR)
3931     {
3932         ERR("Not supported on surfaces with application-provided memory.\n");
3933         return WINEDDERR_NODC;
3934     }
3935
3936     /* Give more detailed info for ddraw. */
3937     if (surface->flags & SFLAG_DCINUSE)
3938         return WINEDDERR_DCALREADYCREATED;
3939
3940     /* Can't GetDC if the surface is locked. */
3941     if (surface->resource.map_count)
3942         return WINED3DERR_INVALIDCALL;
3943
3944     /* Create a DIB section if there isn't a dc yet. */
3945     if (!surface->hDC)
3946     {
3947         if (surface->flags & SFLAG_CLIENT)
3948         {
3949             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3950             surface_release_client_storage(surface);
3951         }
3952         hr = surface_create_dib_section(surface);
3953         if (FAILED(hr))
3954             return WINED3DERR_INVALIDCALL;
3955
3956         /* Use the DIB section from now on if we are not using a PBO. */
3957         if (!(surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)))
3958         {
3959             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3960             surface->resource.heapMemory = NULL;
3961             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3962         }
3963     }
3964
3965     /* Map the surface. */
3966     hr = wined3d_surface_map(surface, &map, NULL, 0);
3967     if (FAILED(hr))
3968     {
3969         ERR("Map failed, hr %#x.\n", hr);
3970         return hr;
3971     }
3972
3973     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3974      * activates the allocatedMemory. */
3975     if (surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM))
3976         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3977
3978     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3979             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3980     {
3981         /* GetDC on palettized formats is unsupported in D3D9, and the method
3982          * is missing in D3D8, so this should only be used for DX <=7
3983          * surfaces (with non-device palettes). */
3984         const PALETTEENTRY *pal = NULL;
3985
3986         if (surface->palette)
3987         {
3988             pal = surface->palette->palents;
3989         }
3990         else
3991         {
3992             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3993             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3994
3995             if (dds_primary && dds_primary->palette)
3996                 pal = dds_primary->palette->palents;
3997         }
3998
3999         if (pal)
4000         {
4001             RGBQUAD col[256];
4002             unsigned int i;
4003
4004             for (i = 0; i < 256; ++i)
4005             {
4006                 col[i].rgbRed = pal[i].peRed;
4007                 col[i].rgbGreen = pal[i].peGreen;
4008                 col[i].rgbBlue = pal[i].peBlue;
4009                 col[i].rgbReserved = 0;
4010             }
4011             SetDIBColorTable(surface->hDC, 0, 256, col);
4012         }
4013     }
4014
4015     surface->flags |= SFLAG_DCINUSE;
4016
4017     *dc = surface->hDC;
4018     TRACE("Returning dc %p.\n", *dc);
4019
4020     return WINED3D_OK;
4021 }
4022
4023 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
4024 {
4025     TRACE("surface %p, dc %p.\n", surface, dc);
4026
4027     if (!(surface->flags & SFLAG_DCINUSE))
4028         return WINEDDERR_NODC;
4029
4030     if (surface->hDC != dc)
4031     {
4032         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
4033                 dc, surface->hDC);
4034         return WINEDDERR_NODC;
4035     }
4036
4037     /* Copy the contents of the DIB over to the PBO. */
4038     if ((surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)) && surface->resource.allocatedMemory)
4039         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
4040
4041     /* We locked first, so unlock now. */
4042     wined3d_surface_unmap(surface);
4043
4044     surface->flags &= ~SFLAG_DCINUSE;
4045
4046     return WINED3D_OK;
4047 }
4048
4049 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
4050 {
4051     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
4052
4053     if (flags)
4054     {
4055         static UINT once;
4056         if (!once++)
4057             FIXME("Ignoring flags %#x.\n", flags);
4058         else
4059             WARN("Ignoring flags %#x.\n", flags);
4060     }
4061
4062     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4063     {
4064         ERR("Not supported on swapchain surfaces.\n");
4065         return WINEDDERR_NOTFLIPPABLE;
4066     }
4067
4068     /* Flipping is only supported on render targets and overlays. */
4069     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
4070     {
4071         WARN("Tried to flip a non-render target, non-overlay surface.\n");
4072         return WINEDDERR_NOTFLIPPABLE;
4073     }
4074
4075     flip_surface(surface, override);
4076
4077     /* Update overlays if they're visible. */
4078     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
4079         return surface_draw_overlay(surface);
4080
4081     return WINED3D_OK;
4082 }
4083
4084 /* Do not call while under the GL lock. */
4085 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
4086 {
4087     struct wined3d_device *device = surface->resource.device;
4088
4089     TRACE("iface %p, srgb %#x.\n", surface, srgb);
4090
4091     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4092     {
4093         struct wined3d_texture *texture = surface->container.u.texture;
4094
4095         TRACE("Passing to container (%p).\n", texture);
4096         texture->texture_ops->texture_preload(texture, srgb);
4097     }
4098     else
4099     {
4100         struct wined3d_context *context;
4101
4102         TRACE("(%p) : About to load surface\n", surface);
4103
4104         /* TODO: Use already acquired context when possible. */
4105         context = context_acquire(device, NULL);
4106
4107         surface_load(surface, srgb == SRGB_SRGB);
4108
4109         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
4110         {
4111             /* Tell opengl to try and keep this texture in video ram (well mostly) */
4112             GLclampf tmp;
4113             tmp = 0.9f;
4114             context->gl_info->gl_ops.gl.p_glPrioritizeTextures(1, &surface->texture_name, &tmp);
4115         }
4116
4117         context_release(context);
4118     }
4119 }
4120
4121 /* Read the framebuffer back into the surface */
4122 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4123 {
4124     struct wined3d_device *device = surface->resource.device;
4125     const struct wined3d_gl_info *gl_info;
4126     struct wined3d_context *context;
4127     BYTE *mem;
4128     GLint fmt;
4129     GLint type;
4130     BYTE *row, *top, *bottom;
4131     int i;
4132     BOOL bpp;
4133     RECT local_rect;
4134     BOOL srcIsUpsideDown;
4135     GLint rowLen = 0;
4136     GLint skipPix = 0;
4137     GLint skipRow = 0;
4138
4139     context = context_acquire(device, surface);
4140     context_apply_blit_state(context, device);
4141     gl_info = context->gl_info;
4142
4143     /* Select the correct read buffer, and give some debug output.
4144      * There is no need to keep track of the current read buffer or reset it, every part of the code
4145      * that reads sets the read buffer as desired.
4146      */
4147     if (surface_is_offscreen(surface))
4148     {
4149         /* Mapping the primary render target which is not on a swapchain.
4150          * Read from the back buffer. */
4151         TRACE("Mapping offscreen render target.\n");
4152         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
4153         srcIsUpsideDown = TRUE;
4154     }
4155     else
4156     {
4157         /* Onscreen surfaces are always part of a swapchain */
4158         GLenum buffer = surface_get_gl_buffer(surface);
4159         TRACE("Mapping %#x buffer.\n", buffer);
4160         gl_info->gl_ops.gl.p_glReadBuffer(buffer);
4161         checkGLcall("glReadBuffer");
4162         srcIsUpsideDown = FALSE;
4163     }
4164
4165     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4166     if (!rect)
4167     {
4168         local_rect.left = 0;
4169         local_rect.top = 0;
4170         local_rect.right = surface->resource.width;
4171         local_rect.bottom = surface->resource.height;
4172     }
4173     else
4174     {
4175         local_rect = *rect;
4176     }
4177     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4178
4179     switch (surface->resource.format->id)
4180     {
4181         case WINED3DFMT_P8_UINT:
4182         {
4183             if (primary_render_target_is_p8(device))
4184             {
4185                 /* In case of P8 render targets the index is stored in the alpha component */
4186                 fmt = GL_ALPHA;
4187                 type = GL_UNSIGNED_BYTE;
4188                 mem = dest;
4189                 bpp = surface->resource.format->byte_count;
4190             }
4191             else
4192             {
4193                 /* GL can't return palettized data, so read ARGB pixels into a
4194                  * separate block of memory and convert them into palettized format
4195                  * in software. Slow, but if the app means to use palettized render
4196                  * targets and locks it...
4197                  *
4198                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4199                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4200                  * for the color channels when palettizing the colors.
4201                  */
4202                 fmt = GL_RGB;
4203                 type = GL_UNSIGNED_BYTE;
4204                 pitch *= 3;
4205                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4206                 if (!mem)
4207                 {
4208                     ERR("Out of memory\n");
4209                     return;
4210                 }
4211                 bpp = surface->resource.format->byte_count * 3;
4212             }
4213         }
4214         break;
4215
4216         default:
4217             mem = dest;
4218             fmt = surface->resource.format->glFormat;
4219             type = surface->resource.format->glType;
4220             bpp = surface->resource.format->byte_count;
4221     }
4222
4223     if (surface->flags & SFLAG_PBO)
4224     {
4225         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4226         checkGLcall("glBindBufferARB");
4227         if (mem)
4228         {
4229             ERR("mem not null for pbo -- unexpected\n");
4230             mem = NULL;
4231         }
4232     }
4233
4234     /* Save old pixel store pack state */
4235     gl_info->gl_ops.gl.p_glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4236     checkGLcall("glGetIntegerv");
4237     gl_info->gl_ops.gl.p_glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4238     checkGLcall("glGetIntegerv");
4239     gl_info->gl_ops.gl.p_glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4240     checkGLcall("glGetIntegerv");
4241
4242     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4243     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4244     checkGLcall("glPixelStorei");
4245     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4246     checkGLcall("glPixelStorei");
4247     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4248     checkGLcall("glPixelStorei");
4249
4250     gl_info->gl_ops.gl.p_glReadPixels(local_rect.left,
4251             !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4252             local_rect.right - local_rect.left,
4253             local_rect.bottom - local_rect.top,
4254             fmt, type, mem);
4255     checkGLcall("glReadPixels");
4256
4257     /* Reset previous pixel store pack state */
4258     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4259     checkGLcall("glPixelStorei");
4260     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4261     checkGLcall("glPixelStorei");
4262     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4263     checkGLcall("glPixelStorei");
4264
4265     if (surface->flags & SFLAG_PBO)
4266     {
4267         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4268         checkGLcall("glBindBufferARB");
4269
4270         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4271          * to get a pointer to it and perform the flipping in software. This is a lot
4272          * faster than calling glReadPixels for each line. In case we want more speed
4273          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4274         if (!srcIsUpsideDown)
4275         {
4276             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4277             checkGLcall("glBindBufferARB");
4278
4279             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4280             checkGLcall("glMapBufferARB");
4281         }
4282     }
4283
4284     /* TODO: Merge this with the palettization loop below for P8 targets */
4285     if(!srcIsUpsideDown) {
4286         UINT len, off;
4287         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4288             Flip the lines in software */
4289         len = (local_rect.right - local_rect.left) * bpp;
4290         off = local_rect.left * bpp;
4291
4292         row = HeapAlloc(GetProcessHeap(), 0, len);
4293         if(!row) {
4294             ERR("Out of memory\n");
4295             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4296                 HeapFree(GetProcessHeap(), 0, mem);
4297             return;
4298         }
4299
4300         top = mem + pitch * local_rect.top;
4301         bottom = mem + pitch * (local_rect.bottom - 1);
4302         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4303             memcpy(row, top + off, len);
4304             memcpy(top + off, bottom + off, len);
4305             memcpy(bottom + off, row, len);
4306             top += pitch;
4307             bottom -= pitch;
4308         }
4309         HeapFree(GetProcessHeap(), 0, row);
4310
4311         /* Unmap the temp PBO buffer */
4312         if (surface->flags & SFLAG_PBO)
4313         {
4314             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4315             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4316         }
4317     }
4318
4319     context_release(context);
4320
4321     /* For P8 textures we need to perform an inverse palette lookup. This is
4322      * done by searching for a palette index which matches the RGB value.
4323      * Note this isn't guaranteed to work when there are multiple entries for
4324      * the same color but we have no choice. In case of P8 render targets,
4325      * the index is stored in the alpha component so no conversion is needed. */
4326     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4327     {
4328         const PALETTEENTRY *pal = NULL;
4329         DWORD width = pitch / 3;
4330         int x, y, c;
4331
4332         if (surface->palette)
4333         {
4334             pal = surface->palette->palents;
4335         }
4336         else
4337         {
4338             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4339             HeapFree(GetProcessHeap(), 0, mem);
4340             return;
4341         }
4342
4343         for(y = local_rect.top; y < local_rect.bottom; y++) {
4344             for(x = local_rect.left; x < local_rect.right; x++) {
4345                 /*                      start              lines            pixels      */
4346                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4347                 const BYTE *green = blue  + 1;
4348                 const BYTE *red = green + 1;
4349
4350                 for(c = 0; c < 256; c++) {
4351                     if(*red   == pal[c].peRed   &&
4352                        *green == pal[c].peGreen &&
4353                        *blue  == pal[c].peBlue)
4354                     {
4355                         *((BYTE *) dest + y * width + x) = c;
4356                         break;
4357                     }
4358                 }
4359             }
4360         }
4361         HeapFree(GetProcessHeap(), 0, mem);
4362     }
4363 }
4364
4365 /* Read the framebuffer contents into a texture. Note that this function
4366  * doesn't do any kind of flipping. Using this on an onscreen surface will
4367  * result in a flipped D3D texture. */
4368 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4369 {
4370     struct wined3d_device *device = surface->resource.device;
4371     const struct wined3d_gl_info *gl_info;
4372     struct wined3d_context *context;
4373
4374     context = context_acquire(device, surface);
4375     gl_info = context->gl_info;
4376     device_invalidate_state(device, STATE_FRAMEBUFFER);
4377
4378     surface_prepare_texture(surface, context, srgb);
4379     surface_bind_and_dirtify(surface, context, srgb);
4380
4381     TRACE("Reading back offscreen render target %p.\n", surface);
4382
4383     if (surface_is_offscreen(surface))
4384         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
4385     else
4386         gl_info->gl_ops.gl.p_glReadBuffer(surface_get_gl_buffer(surface));
4387     checkGLcall("glReadBuffer");
4388
4389     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4390             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4391     checkGLcall("glCopyTexSubImage2D");
4392
4393     context_release(context);
4394 }
4395
4396 /* Context activation is done by the caller. */
4397 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4398         struct wined3d_context *context, BOOL srgb)
4399 {
4400     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4401     enum wined3d_conversion_type convert;
4402     struct wined3d_format format;
4403
4404     if (surface->flags & alloc_flag) return;
4405
4406     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4407     if (convert != WINED3D_CT_NONE || format.convert)
4408         surface->flags |= SFLAG_CONVERTED;
4409     else surface->flags &= ~SFLAG_CONVERTED;
4410
4411     surface_bind_and_dirtify(surface, context, srgb);
4412     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4413     surface->flags |= alloc_flag;
4414 }
4415
4416 /* Context activation is done by the caller. */
4417 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4418 {
4419     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4420     {
4421         struct wined3d_texture *texture = surface->container.u.texture;
4422         UINT sub_count = texture->level_count * texture->layer_count;
4423         UINT i;
4424
4425         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4426
4427         for (i = 0; i < sub_count; ++i)
4428         {
4429             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4430             surface_prepare_texture_internal(s, context, srgb);
4431         }
4432
4433         return;
4434     }
4435
4436     surface_prepare_texture_internal(surface, context, srgb);
4437 }
4438
4439 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4440 {
4441     if (multisample)
4442     {
4443         if (surface->rb_multisample)
4444             return;
4445
4446         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4447         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4448         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4449                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4450         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4451     }
4452     else
4453     {
4454         if (surface->rb_resolved)
4455             return;
4456
4457         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4458         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4459         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4460                 surface->pow2Width, surface->pow2Height);
4461         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4462     }
4463 }
4464
4465 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4466         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4467 {
4468     struct wined3d_device *device = surface->resource.device;
4469     UINT pitch = wined3d_surface_get_pitch(surface);
4470     const struct wined3d_gl_info *gl_info;
4471     struct wined3d_context *context;
4472     RECT local_rect;
4473     UINT w, h;
4474
4475     surface_get_rect(surface, rect, &local_rect);
4476
4477     mem += local_rect.top * pitch + local_rect.left * bpp;
4478     w = local_rect.right - local_rect.left;
4479     h = local_rect.bottom - local_rect.top;
4480
4481     /* Activate the correct context for the render target */
4482     context = context_acquire(device, surface);
4483     context_apply_blit_state(context, device);
4484     gl_info = context->gl_info;
4485
4486     if (!surface_is_offscreen(surface))
4487     {
4488         GLenum buffer = surface_get_gl_buffer(surface);
4489         TRACE("Unlocking %#x buffer.\n", buffer);
4490         context_set_draw_buffer(context, buffer);
4491
4492         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4493         gl_info->gl_ops.gl.p_glPixelZoom(1.0f, -1.0f);
4494     }
4495     else
4496     {
4497         /* Primary offscreen render target */
4498         TRACE("Offscreen render target.\n");
4499         context_set_draw_buffer(context, device->offscreenBuffer);
4500
4501         gl_info->gl_ops.gl.p_glPixelZoom(1.0f, 1.0f);
4502     }
4503
4504     gl_info->gl_ops.gl.p_glRasterPos3i(local_rect.left, local_rect.top, 1);
4505     checkGLcall("glRasterPos3i");
4506
4507     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4508     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4509
4510     if (surface->flags & SFLAG_PBO)
4511     {
4512         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4513         checkGLcall("glBindBufferARB");
4514     }
4515
4516     gl_info->gl_ops.gl.p_glDrawPixels(w, h, fmt, type, mem);
4517     checkGLcall("glDrawPixels");
4518
4519     if (surface->flags & SFLAG_PBO)
4520     {
4521         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4522         checkGLcall("glBindBufferARB");
4523     }
4524
4525     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4526     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4527
4528     if (wined3d_settings.strict_draw_ordering
4529             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4530             && surface->container.u.swapchain->front_buffer == surface))
4531         gl_info->gl_ops.gl.p_glFlush();
4532
4533     context_release(context);
4534 }
4535
4536 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4537 {
4538     /* FIXME: Is this really how color keys are supposed to work? I think it
4539      * makes more sense to compare the individual channels. */
4540     return color >= color_key->color_space_low_value
4541             && color <= color_key->color_space_high_value;
4542 }
4543
4544 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4545 {
4546     const struct wined3d_device *device = surface->resource.device;
4547     const struct wined3d_palette *pal = surface->palette;
4548     BOOL index_in_alpha = FALSE;
4549     unsigned int i;
4550
4551     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4552      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4553      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4554      * duplicate entries. Store the color key in the unused alpha component to speed the
4555      * download up and to make conversion unneeded. */
4556     index_in_alpha = primary_render_target_is_p8(device);
4557
4558     if (!pal)
4559     {
4560         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4561         if (index_in_alpha)
4562         {
4563             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4564              * there's no palette at this time. */
4565             for (i = 0; i < 256; i++) table[i][3] = i;
4566         }
4567     }
4568     else
4569     {
4570         TRACE("Using surface palette %p\n", pal);
4571         /* Get the surface's palette */
4572         for (i = 0; i < 256; ++i)
4573         {
4574             table[i][0] = pal->palents[i].peRed;
4575             table[i][1] = pal->palents[i].peGreen;
4576             table[i][2] = pal->palents[i].peBlue;
4577
4578             /* When index_in_alpha is set the palette index is stored in the
4579              * alpha component. In case of a readback we can then read
4580              * GL_ALPHA. Color keying is handled in BltOverride using a
4581              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4582              * color key itself is passed to glAlphaFunc in other cases the
4583              * alpha component of pixels that should be masked away is set to 0. */
4584             if (index_in_alpha)
4585                 table[i][3] = i;
4586             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4587                 table[i][3] = 0x00;
4588             else if (pal->flags & WINEDDPCAPS_ALPHA)
4589                 table[i][3] = pal->palents[i].peFlags;
4590             else
4591                 table[i][3] = 0xff;
4592         }
4593     }
4594 }
4595
4596 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width, UINT height,
4597         UINT outpitch, enum wined3d_conversion_type conversion_type, struct wined3d_surface *surface)
4598 {
4599     const BYTE *source;
4600     BYTE *dest;
4601
4602     TRACE("src %p, dst %p, pitch %u, width %u, height %u, outpitch %u, conversion_type %#x, surface %p.\n",
4603             src, dst, pitch, width, height, outpitch, conversion_type, surface);
4604
4605     switch (conversion_type)
4606     {
4607         case WINED3D_CT_NONE:
4608         {
4609             memcpy(dst, src, pitch * height);
4610             break;
4611         }
4612
4613         case WINED3D_CT_PALETTED:
4614         case WINED3D_CT_PALETTED_CK:
4615         {
4616             BYTE table[256][4];
4617             unsigned int x, y;
4618
4619             d3dfmt_p8_init_palette(surface, table, (conversion_type == WINED3D_CT_PALETTED_CK));
4620
4621             for (y = 0; y < height; y++)
4622             {
4623                 source = src + pitch * y;
4624                 dest = dst + outpitch * y;
4625                 /* This is an 1 bpp format, using the width here is fine */
4626                 for (x = 0; x < width; x++) {
4627                     BYTE color = *source++;
4628                     *dest++ = table[color][0];
4629                     *dest++ = table[color][1];
4630                     *dest++ = table[color][2];
4631                     *dest++ = table[color][3];
4632                 }
4633             }
4634         }
4635         break;
4636
4637         case WINED3D_CT_CK_565:
4638         {
4639             /* Converting the 565 format in 5551 packed to emulate color-keying.
4640
4641               Note : in all these conversion, it would be best to average the averaging
4642                       pixels to get the color of the pixel that will be color-keyed to
4643                       prevent 'color bleeding'. This will be done later on if ever it is
4644                       too visible.
4645
4646               Note2: Nvidia documents say that their driver does not support alpha + color keying
4647                      on the same surface and disables color keying in such a case
4648             */
4649             unsigned int x, y;
4650             const WORD *Source;
4651             WORD *Dest;
4652
4653             TRACE("Color keyed 565\n");
4654
4655             for (y = 0; y < height; y++) {
4656                 Source = (const WORD *)(src + y * pitch);
4657                 Dest = (WORD *) (dst + y * outpitch);
4658                 for (x = 0; x < width; x++ ) {
4659                     WORD color = *Source++;
4660                     *Dest = ((color & 0xffc0) | ((color & 0x1f) << 1));
4661                     if (!color_in_range(&surface->src_blt_color_key, color))
4662                         *Dest |= 0x0001;
4663                     Dest++;
4664                 }
4665             }
4666         }
4667         break;
4668
4669         case WINED3D_CT_CK_5551:
4670         {
4671             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4672             unsigned int x, y;
4673             const WORD *Source;
4674             WORD *Dest;
4675             TRACE("Color keyed 5551\n");
4676             for (y = 0; y < height; y++) {
4677                 Source = (const WORD *)(src + y * pitch);
4678                 Dest = (WORD *) (dst + y * outpitch);
4679                 for (x = 0; x < width; x++ ) {
4680                     WORD color = *Source++;
4681                     *Dest = color;
4682                     if (!color_in_range(&surface->src_blt_color_key, color))
4683                         *Dest |= (1 << 15);
4684                     else
4685                         *Dest &= ~(1 << 15);
4686                     Dest++;
4687                 }
4688             }
4689         }
4690         break;
4691
4692         case WINED3D_CT_CK_RGB24:
4693         {
4694             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4695             unsigned int x, y;
4696             for (y = 0; y < height; y++)
4697             {
4698                 source = src + pitch * y;
4699                 dest = dst + outpitch * y;
4700                 for (x = 0; x < width; x++) {
4701                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4702                     DWORD dstcolor = color << 8;
4703                     if (!color_in_range(&surface->src_blt_color_key, color))
4704                         dstcolor |= 0xff;
4705                     *(DWORD*)dest = dstcolor;
4706                     source += 3;
4707                     dest += 4;
4708                 }
4709             }
4710         }
4711         break;
4712
4713         case WINED3D_CT_RGB32_888:
4714         {
4715             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4716             unsigned int x, y;
4717             for (y = 0; y < height; y++)
4718             {
4719                 source = src + pitch * y;
4720                 dest = dst + outpitch * y;
4721                 for (x = 0; x < width; x++) {
4722                     DWORD color = 0xffffff & *(const DWORD*)source;
4723                     DWORD dstcolor = color << 8;
4724                     if (!color_in_range(&surface->src_blt_color_key, color))
4725                         dstcolor |= 0xff;
4726                     *(DWORD*)dest = dstcolor;
4727                     source += 4;
4728                     dest += 4;
4729                 }
4730             }
4731         }
4732         break;
4733
4734         case WINED3D_CT_CK_ARGB32:
4735         {
4736             unsigned int x, y;
4737             for (y = 0; y < height; ++y)
4738             {
4739                 source = src + pitch * y;
4740                 dest = dst + outpitch * y;
4741                 for (x = 0; x < width; ++x)
4742                 {
4743                     DWORD color = *(const DWORD *)source;
4744                     if (color_in_range(&surface->src_blt_color_key, color))
4745                         color &= ~0xff000000;
4746                     *(DWORD*)dest = color;
4747                     source += 4;
4748                     dest += 4;
4749                 }
4750             }
4751         }
4752         break;
4753
4754         default:
4755             ERR("Unsupported conversion type %#x.\n", conversion_type);
4756     }
4757     return WINED3D_OK;
4758 }
4759
4760 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4761 {
4762     /* Flip the surface contents */
4763     /* Flip the DC */
4764     {
4765         HDC tmp;
4766         tmp = front->hDC;
4767         front->hDC = back->hDC;
4768         back->hDC = tmp;
4769     }
4770
4771     /* Flip the DIBsection */
4772     {
4773         HBITMAP tmp = front->dib.DIBsection;
4774         front->dib.DIBsection = back->dib.DIBsection;
4775         back->dib.DIBsection = tmp;
4776     }
4777
4778     /* Flip the surface data */
4779     {
4780         void* tmp;
4781
4782         tmp = front->dib.bitmap_data;
4783         front->dib.bitmap_data = back->dib.bitmap_data;
4784         back->dib.bitmap_data = tmp;
4785
4786         tmp = front->resource.allocatedMemory;
4787         front->resource.allocatedMemory = back->resource.allocatedMemory;
4788         back->resource.allocatedMemory = tmp;
4789
4790         tmp = front->resource.heapMemory;
4791         front->resource.heapMemory = back->resource.heapMemory;
4792         back->resource.heapMemory = tmp;
4793     }
4794
4795     /* Flip the PBO */
4796     {
4797         GLuint tmp_pbo = front->pbo;
4798         front->pbo = back->pbo;
4799         back->pbo = tmp_pbo;
4800     }
4801
4802     /* Flip the opengl texture */
4803     {
4804         GLuint tmp;
4805
4806         tmp = back->texture_name;
4807         back->texture_name = front->texture_name;
4808         front->texture_name = tmp;
4809
4810         tmp = back->texture_name_srgb;
4811         back->texture_name_srgb = front->texture_name_srgb;
4812         front->texture_name_srgb = tmp;
4813
4814         tmp = back->rb_multisample;
4815         back->rb_multisample = front->rb_multisample;
4816         front->rb_multisample = tmp;
4817
4818         tmp = back->rb_resolved;
4819         back->rb_resolved = front->rb_resolved;
4820         front->rb_resolved = tmp;
4821
4822         resource_unload(&back->resource);
4823         resource_unload(&front->resource);
4824     }
4825
4826     {
4827         DWORD tmp_flags = back->flags;
4828         back->flags = front->flags;
4829         front->flags = tmp_flags;
4830     }
4831 }
4832
4833 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4834  * pixel copy calls. */
4835 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4836         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4837 {
4838     struct wined3d_device *device = dst_surface->resource.device;
4839     const struct wined3d_gl_info *gl_info;
4840     float xrel, yrel;
4841     UINT row;
4842     struct wined3d_context *context;
4843     BOOL upsidedown = FALSE;
4844     RECT dst_rect = *dst_rect_in;
4845     GLenum dst_target;
4846
4847     if (dst_surface->container.type == WINED3D_CONTAINER_TEXTURE)
4848         dst_target = dst_surface->container.u.texture->target;
4849     else
4850         dst_target = dst_surface->texture_target;
4851
4852     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4853      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4854      */
4855     if(dst_rect.top > dst_rect.bottom) {
4856         UINT tmp = dst_rect.bottom;
4857         dst_rect.bottom = dst_rect.top;
4858         dst_rect.top = tmp;
4859         upsidedown = TRUE;
4860     }
4861
4862     context = context_acquire(device, src_surface);
4863     gl_info = context->gl_info;
4864     context_apply_blit_state(context, device);
4865     surface_internal_preload(dst_surface, SRGB_RGB);
4866
4867     /* Bind the target texture */
4868     context_bind_texture(context, dst_target, dst_surface->texture_name);
4869     if (surface_is_offscreen(src_surface))
4870     {
4871         TRACE("Reading from an offscreen target\n");
4872         upsidedown = !upsidedown;
4873         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
4874     }
4875     else
4876     {
4877         gl_info->gl_ops.gl.p_glReadBuffer(surface_get_gl_buffer(src_surface));
4878     }
4879     checkGLcall("glReadBuffer");
4880
4881     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4882     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4883
4884     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4885     {
4886         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4887
4888         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4889             ERR("Texture filtering not supported in direct blit.\n");
4890     }
4891     else if ((filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4892             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4893     {
4894         ERR("Texture filtering not supported in direct blit\n");
4895     }
4896
4897     if (upsidedown
4898             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4899             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4900     {
4901         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do. */
4902         gl_info->gl_ops.gl.p_glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4903                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4904                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4905                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4906     }
4907     else
4908     {
4909         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4910         /* I have to process this row by row to swap the image,
4911          * otherwise it would be upside down, so stretching in y direction
4912          * doesn't cost extra time
4913          *
4914          * However, stretching in x direction can be avoided if not necessary
4915          */
4916         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4917             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4918             {
4919                 /* Well, that stuff works, but it's very slow.
4920                  * find a better way instead
4921                  */
4922                 UINT col;
4923
4924                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4925                 {
4926                     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4927                             dst_rect.left + col /* x offset */, row /* y offset */,
4928                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4929                 }
4930             }
4931             else
4932             {
4933                 gl_info->gl_ops.gl.p_glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4934                         dst_rect.left /* x offset */, row /* y offset */,
4935                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4936             }
4937         }
4938     }
4939     checkGLcall("glCopyTexSubImage2D");
4940
4941     context_release(context);
4942
4943     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4944      * path is never entered
4945      */
4946     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4947 }
4948
4949 /* Uses the hardware to stretch and flip the image */
4950 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4951         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4952 {
4953     struct wined3d_device *device = dst_surface->resource.device;
4954     struct wined3d_swapchain *src_swapchain = NULL;
4955     GLuint src, backup = 0;
4956     float left, right, top, bottom; /* Texture coordinates */
4957     UINT fbwidth = src_surface->resource.width;
4958     UINT fbheight = src_surface->resource.height;
4959     const struct wined3d_gl_info *gl_info;
4960     struct wined3d_context *context;
4961     GLenum drawBuffer = GL_BACK;
4962     GLenum texture_target;
4963     BOOL noBackBufferBackup;
4964     BOOL src_offscreen;
4965     BOOL upsidedown = FALSE;
4966     RECT dst_rect = *dst_rect_in;
4967
4968     TRACE("Using hwstretch blit\n");
4969     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4970     context = context_acquire(device, src_surface);
4971     gl_info = context->gl_info;
4972     context_apply_blit_state(context, device);
4973     surface_internal_preload(dst_surface, SRGB_RGB);
4974
4975     src_offscreen = surface_is_offscreen(src_surface);
4976     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4977     if (!noBackBufferBackup && !src_surface->texture_name)
4978     {
4979         /* Get it a description */
4980         surface_internal_preload(src_surface, SRGB_RGB);
4981     }
4982
4983     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4984      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4985      */
4986     if (context->aux_buffers >= 2)
4987     {
4988         /* Got more than one aux buffer? Use the 2nd aux buffer */
4989         drawBuffer = GL_AUX1;
4990     }
4991     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4992     {
4993         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4994         drawBuffer = GL_AUX0;
4995     }
4996
4997     if (noBackBufferBackup)
4998     {
4999         gl_info->gl_ops.gl.p_glGenTextures(1, &backup);
5000         checkGLcall("glGenTextures");
5001         context_bind_texture(context, GL_TEXTURE_2D, backup);
5002         texture_target = GL_TEXTURE_2D;
5003     }
5004     else
5005     {
5006         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5007          * we are reading from the back buffer, the backup can be used as source texture
5008          */
5009         texture_target = src_surface->texture_target;
5010         context_bind_texture(context, texture_target, src_surface->texture_name);
5011         gl_info->gl_ops.gl.p_glEnable(texture_target);
5012         checkGLcall("glEnable(texture_target)");
5013
5014         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5015         src_surface->flags &= ~SFLAG_INTEXTURE;
5016     }
5017
5018     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5019      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5020      */
5021     if(dst_rect.top > dst_rect.bottom) {
5022         UINT tmp = dst_rect.bottom;
5023         dst_rect.bottom = dst_rect.top;
5024         dst_rect.top = tmp;
5025         upsidedown = TRUE;
5026     }
5027
5028     if (src_offscreen)
5029     {
5030         TRACE("Reading from an offscreen target\n");
5031         upsidedown = !upsidedown;
5032         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
5033     }
5034     else
5035     {
5036         gl_info->gl_ops.gl.p_glReadBuffer(surface_get_gl_buffer(src_surface));
5037     }
5038
5039     /* TODO: Only back up the part that will be overwritten */
5040     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(texture_target, 0, 0, 0, 0, 0, fbwidth, fbheight);
5041
5042     checkGLcall("glCopyTexSubImage2D");
5043
5044     /* No issue with overriding these - the sampler is dirty due to blit usage */
5045     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5046             wined3d_gl_mag_filter(magLookup, filter));
5047     checkGLcall("glTexParameteri");
5048     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5049             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
5050     checkGLcall("glTexParameteri");
5051
5052     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5053         src_swapchain = src_surface->container.u.swapchain;
5054     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5055     {
5056         src = backup ? backup : src_surface->texture_name;
5057     }
5058     else
5059     {
5060         gl_info->gl_ops.gl.p_glReadBuffer(GL_FRONT);
5061         checkGLcall("glReadBuffer(GL_FRONT)");
5062
5063         gl_info->gl_ops.gl.p_glGenTextures(1, &src);
5064         checkGLcall("glGenTextures(1, &src)");
5065         context_bind_texture(context, GL_TEXTURE_2D, src);
5066
5067         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5068          * out for power of 2 sizes
5069          */
5070         gl_info->gl_ops.gl.p_glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5071                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5072         checkGLcall("glTexImage2D");
5073         gl_info->gl_ops.gl.p_glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, fbwidth, fbheight);
5074
5075         gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5076         checkGLcall("glTexParameteri");
5077         gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5078         checkGLcall("glTexParameteri");
5079
5080         gl_info->gl_ops.gl.p_glReadBuffer(GL_BACK);
5081         checkGLcall("glReadBuffer(GL_BACK)");
5082
5083         if (texture_target != GL_TEXTURE_2D)
5084         {
5085             gl_info->gl_ops.gl.p_glDisable(texture_target);
5086             gl_info->gl_ops.gl.p_glEnable(GL_TEXTURE_2D);
5087             texture_target = GL_TEXTURE_2D;
5088         }
5089     }
5090     checkGLcall("glEnd and previous");
5091
5092     left = src_rect->left;
5093     right = src_rect->right;
5094
5095     if (!upsidedown)
5096     {
5097         top = src_surface->resource.height - src_rect->top;
5098         bottom = src_surface->resource.height - src_rect->bottom;
5099     }
5100     else
5101     {
5102         top = src_surface->resource.height - src_rect->bottom;
5103         bottom = src_surface->resource.height - src_rect->top;
5104     }
5105
5106     if (src_surface->flags & SFLAG_NORMCOORD)
5107     {
5108         left /= src_surface->pow2Width;
5109         right /= src_surface->pow2Width;
5110         top /= src_surface->pow2Height;
5111         bottom /= src_surface->pow2Height;
5112     }
5113
5114     /* draw the source texture stretched and upside down. The correct surface is bound already */
5115     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5116     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5117
5118     context_set_draw_buffer(context, drawBuffer);
5119     gl_info->gl_ops.gl.p_glReadBuffer(drawBuffer);
5120
5121     gl_info->gl_ops.gl.p_glBegin(GL_QUADS);
5122         /* bottom left */
5123         gl_info->gl_ops.gl.p_glTexCoord2f(left, bottom);
5124         gl_info->gl_ops.gl.p_glVertex2i(0, 0);
5125
5126         /* top left */
5127         gl_info->gl_ops.gl.p_glTexCoord2f(left, top);
5128         gl_info->gl_ops.gl.p_glVertex2i(0, dst_rect.bottom - dst_rect.top);
5129
5130         /* top right */
5131         gl_info->gl_ops.gl.p_glTexCoord2f(right, top);
5132         gl_info->gl_ops.gl.p_glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5133
5134         /* bottom right */
5135         gl_info->gl_ops.gl.p_glTexCoord2f(right, bottom);
5136         gl_info->gl_ops.gl.p_glVertex2i(dst_rect.right - dst_rect.left, 0);
5137     gl_info->gl_ops.gl.p_glEnd();
5138     checkGLcall("glEnd and previous");
5139
5140     if (texture_target != dst_surface->texture_target)
5141     {
5142         gl_info->gl_ops.gl.p_glDisable(texture_target);
5143         gl_info->gl_ops.gl.p_glEnable(dst_surface->texture_target);
5144         texture_target = dst_surface->texture_target;
5145     }
5146
5147     /* Now read the stretched and upside down image into the destination texture */
5148     context_bind_texture(context, texture_target, dst_surface->texture_name);
5149     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(texture_target,
5150                         0,
5151                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5152                         0, 0, /* We blitted the image to the origin */
5153                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5154     checkGLcall("glCopyTexSubImage2D");
5155
5156     if (drawBuffer == GL_BACK)
5157     {
5158         /* Write the back buffer backup back. */
5159         if (backup)
5160         {
5161             if (texture_target != GL_TEXTURE_2D)
5162             {
5163                 gl_info->gl_ops.gl.p_glDisable(texture_target);
5164                 gl_info->gl_ops.gl.p_glEnable(GL_TEXTURE_2D);
5165                 texture_target = GL_TEXTURE_2D;
5166             }
5167             context_bind_texture(context, GL_TEXTURE_2D, backup);
5168         }
5169         else
5170         {
5171             if (texture_target != src_surface->texture_target)
5172             {
5173                 gl_info->gl_ops.gl.p_glDisable(texture_target);
5174                 gl_info->gl_ops.gl.p_glEnable(src_surface->texture_target);
5175                 texture_target = src_surface->texture_target;
5176             }
5177             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5178         }
5179
5180         gl_info->gl_ops.gl.p_glBegin(GL_QUADS);
5181             /* top left */
5182             gl_info->gl_ops.gl.p_glTexCoord2f(0.0f, 0.0f);
5183             gl_info->gl_ops.gl.p_glVertex2i(0, fbheight);
5184
5185             /* bottom left */
5186             gl_info->gl_ops.gl.p_glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5187             gl_info->gl_ops.gl.p_glVertex2i(0, 0);
5188
5189             /* bottom right */
5190             gl_info->gl_ops.gl.p_glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5191                     (float)fbheight / (float)src_surface->pow2Height);
5192             gl_info->gl_ops.gl.p_glVertex2i(fbwidth, 0);
5193
5194             /* top right */
5195             gl_info->gl_ops.gl.p_glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5196             gl_info->gl_ops.gl.p_glVertex2i(fbwidth, fbheight);
5197         gl_info->gl_ops.gl.p_glEnd();
5198     }
5199     gl_info->gl_ops.gl.p_glDisable(texture_target);
5200     checkGLcall("glDisable(texture_target)");
5201
5202     /* Cleanup */
5203     if (src != src_surface->texture_name && src != backup)
5204     {
5205         gl_info->gl_ops.gl.p_glDeleteTextures(1, &src);
5206         checkGLcall("glDeleteTextures(1, &src)");
5207     }
5208     if (backup)
5209     {
5210         gl_info->gl_ops.gl.p_glDeleteTextures(1, &backup);
5211         checkGLcall("glDeleteTextures(1, &backup)");
5212     }
5213
5214     if (wined3d_settings.strict_draw_ordering)
5215         gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5216
5217     context_release(context);
5218
5219     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5220      * path is never entered
5221      */
5222     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5223 }
5224
5225 /* Front buffer coordinates are always full screen coordinates, but our GL
5226  * drawable is limited to the window's client area. The sysmem and texture
5227  * copies do have the full screen size. Note that GL has a bottom-left
5228  * origin, while D3D has a top-left origin. */
5229 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5230 {
5231     UINT drawable_height;
5232
5233     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5234             && surface == surface->container.u.swapchain->front_buffer)
5235     {
5236         POINT offset = {0, 0};
5237         RECT windowsize;
5238
5239         ScreenToClient(window, &offset);
5240         OffsetRect(rect, offset.x, offset.y);
5241
5242         GetClientRect(window, &windowsize);
5243         drawable_height = windowsize.bottom - windowsize.top;
5244     }
5245     else
5246     {
5247         drawable_height = surface->resource.height;
5248     }
5249
5250     rect->top = drawable_height - rect->top;
5251     rect->bottom = drawable_height - rect->bottom;
5252 }
5253
5254 static void surface_blt_to_drawable(const struct wined3d_device *device,
5255         enum wined3d_texture_filter_type filter, BOOL color_key,
5256         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5257         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5258 {
5259     const struct wined3d_gl_info *gl_info;
5260     struct wined3d_context *context;
5261     RECT src_rect, dst_rect;
5262
5263     src_rect = *src_rect_in;
5264     dst_rect = *dst_rect_in;
5265
5266     /* Make sure the surface is up-to-date. This should probably use
5267      * surface_load_location() and worry about the destination surface too,
5268      * unless we're overwriting it completely. */
5269     surface_internal_preload(src_surface, SRGB_RGB);
5270
5271     /* Activate the destination context, set it up for blitting */
5272     context = context_acquire(device, dst_surface);
5273     gl_info = context->gl_info;
5274     context_apply_blit_state(context, device);
5275
5276     if (!surface_is_offscreen(dst_surface))
5277         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5278
5279     device->blitter->set_shader(device->blit_priv, context, src_surface);
5280
5281     if (color_key)
5282     {
5283         gl_info->gl_ops.gl.p_glEnable(GL_ALPHA_TEST);
5284         checkGLcall("glEnable(GL_ALPHA_TEST)");
5285
5286         /* When the primary render target uses P8, the alpha component
5287          * contains the palette index. Which means that the colorkey is one of
5288          * the palette entries. In other cases pixels that should be masked
5289          * away have alpha set to 0. */
5290         if (primary_render_target_is_p8(device))
5291             gl_info->gl_ops.gl.p_glAlphaFunc(GL_NOTEQUAL,
5292                     (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5293         else
5294             gl_info->gl_ops.gl.p_glAlphaFunc(GL_NOTEQUAL, 0.0f);
5295         checkGLcall("glAlphaFunc");
5296     }
5297     else
5298     {
5299         gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
5300         checkGLcall("glDisable(GL_ALPHA_TEST)");
5301     }
5302
5303     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5304
5305     if (color_key)
5306     {
5307         gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
5308         checkGLcall("glDisable(GL_ALPHA_TEST)");
5309     }
5310
5311     /* Leave the opengl state valid for blitting */
5312     device->blitter->unset_shader(context->gl_info);
5313
5314     if (wined3d_settings.strict_draw_ordering
5315             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5316             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5317         gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5318
5319     context_release(context);
5320 }
5321
5322 /* Do not call while under the GL lock. */
5323 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5324 {
5325     struct wined3d_device *device = s->resource.device;
5326     const struct blit_shader *blitter;
5327
5328     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5329             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5330     if (!blitter)
5331     {
5332         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5333         return WINED3DERR_INVALIDCALL;
5334     }
5335
5336     return blitter->color_fill(device, s, rect, color);
5337 }
5338
5339 /* Do not call while under the GL lock. */
5340 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5341         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5342         enum wined3d_texture_filter_type filter)
5343 {
5344     struct wined3d_device *device = dst_surface->resource.device;
5345     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5346     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5347
5348     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5349             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5350             flags, DDBltFx, debug_d3dtexturefiltertype(filter));
5351
5352     /* Get the swapchain. One of the surfaces has to be a primary surface */
5353     if (dst_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5354     {
5355         WARN("Destination is in sysmem, rejecting gl blt\n");
5356         return WINED3DERR_INVALIDCALL;
5357     }
5358
5359     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5360         dstSwapchain = dst_surface->container.u.swapchain;
5361
5362     if (src_surface)
5363     {
5364         if (src_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5365         {
5366             WARN("Src is in sysmem, rejecting gl blt\n");
5367             return WINED3DERR_INVALIDCALL;
5368         }
5369
5370         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5371             srcSwapchain = src_surface->container.u.swapchain;
5372     }
5373
5374     /* Early sort out of cases where no render target is used */
5375     if (!dstSwapchain && !srcSwapchain
5376             && src_surface != device->fb.render_targets[0]
5377             && dst_surface != device->fb.render_targets[0])
5378     {
5379         TRACE("No surface is render target, not using hardware blit.\n");
5380         return WINED3DERR_INVALIDCALL;
5381     }
5382
5383     /* No destination color keying supported */
5384     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5385     {
5386         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5387         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5388         return WINED3DERR_INVALIDCALL;
5389     }
5390
5391     if (dstSwapchain && dstSwapchain == srcSwapchain)
5392     {
5393         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5394         return WINED3DERR_INVALIDCALL;
5395     }
5396
5397     if (dstSwapchain && srcSwapchain)
5398     {
5399         FIXME("Implement hardware blit between two different swapchains\n");
5400         return WINED3DERR_INVALIDCALL;
5401     }
5402
5403     if (dstSwapchain)
5404     {
5405         /* Handled with regular texture -> swapchain blit */
5406         if (src_surface == device->fb.render_targets[0])
5407             TRACE("Blit from active render target to a swapchain\n");
5408     }
5409     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5410     {
5411         FIXME("Implement blit from a swapchain to the active render target\n");
5412         return WINED3DERR_INVALIDCALL;
5413     }
5414
5415     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5416     {
5417         /* Blit from render target to texture */
5418         BOOL stretchx;
5419
5420         /* P8 read back is not implemented */
5421         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5422                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5423         {
5424             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5425             return WINED3DERR_INVALIDCALL;
5426         }
5427
5428         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5429         {
5430             TRACE("Color keying not supported by frame buffer to texture blit\n");
5431             return WINED3DERR_INVALIDCALL;
5432             /* Destination color key is checked above */
5433         }
5434
5435         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5436             stretchx = TRUE;
5437         else
5438             stretchx = FALSE;
5439
5440         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5441          * flip the image nor scale it.
5442          *
5443          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5444          * -> If the app wants a image width an unscaled width, copy it line per line
5445          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5446          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5447          *    back buffer. This is slower than reading line per line, thus not used for flipping
5448          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5449          *    pixel by pixel. */
5450         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5451                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5452         {
5453             TRACE("No stretching in x direction, using direct framebuffer -> texture copy.\n");
5454             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, filter);
5455         }
5456         else
5457         {
5458             TRACE("Using hardware stretching to flip / stretch the texture.\n");
5459             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, filter);
5460         }
5461
5462         if (!dst_surface->resource.map_count && !(dst_surface->flags & SFLAG_DONOTFREE))
5463         {
5464             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5465             dst_surface->resource.allocatedMemory = NULL;
5466             dst_surface->resource.heapMemory = NULL;
5467         }
5468         else
5469         {
5470             dst_surface->flags &= ~SFLAG_INSYSMEM;
5471         }
5472
5473         return WINED3D_OK;
5474     }
5475     else if (src_surface)
5476     {
5477         /* Blit from offscreen surface to render target */
5478         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5479         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5480
5481         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5482
5483         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5484                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5485                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5486         {
5487             FIXME("Unsupported blit operation falling back to software\n");
5488             return WINED3DERR_INVALIDCALL;
5489         }
5490
5491         /* Color keying: Check if we have to do a color keyed blt,
5492          * and if not check if a color key is activated.
5493          *
5494          * Just modify the color keying parameters in the surface and restore them afterwards
5495          * The surface keeps track of the color key last used to load the opengl surface.
5496          * PreLoad will catch the change to the flags and color key and reload if necessary.
5497          */
5498         if (flags & WINEDDBLT_KEYSRC)
5499         {
5500             /* Use color key from surface */
5501         }
5502         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5503         {
5504             /* Use color key from DDBltFx */
5505             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5506             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5507         }
5508         else
5509         {
5510             /* Do not use color key */
5511             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5512         }
5513
5514         surface_blt_to_drawable(device, filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5515                 src_surface, src_rect, dst_surface, dst_rect);
5516
5517         /* Restore the color key parameters */
5518         src_surface->CKeyFlags = oldCKeyFlags;
5519         src_surface->src_blt_color_key = old_blt_key;
5520
5521         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5522
5523         return WINED3D_OK;
5524     }
5525
5526     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5527     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5528     return WINED3DERR_INVALIDCALL;
5529 }
5530
5531 /* Context activation is done by the caller. */
5532 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5533         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5534 {
5535     struct wined3d_device *device = surface->resource.device;
5536     const struct wined3d_gl_info *gl_info = context->gl_info;
5537     GLint compare_mode = GL_NONE;
5538     struct blt_info info;
5539     GLint old_binding = 0;
5540     RECT rect;
5541
5542     gl_info->gl_ops.gl.p_glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5543
5544     gl_info->gl_ops.gl.p_glDisable(GL_CULL_FACE);
5545     gl_info->gl_ops.gl.p_glDisable(GL_BLEND);
5546     gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
5547     gl_info->gl_ops.gl.p_glDisable(GL_SCISSOR_TEST);
5548     gl_info->gl_ops.gl.p_glDisable(GL_STENCIL_TEST);
5549     gl_info->gl_ops.gl.p_glEnable(GL_DEPTH_TEST);
5550     gl_info->gl_ops.gl.p_glDepthFunc(GL_ALWAYS);
5551     gl_info->gl_ops.gl.p_glDepthMask(GL_TRUE);
5552     gl_info->gl_ops.gl.p_glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5553     gl_info->gl_ops.gl.p_glViewport(x, y, w, h);
5554     gl_info->gl_ops.gl.p_glDepthRange(0.0, 1.0);
5555
5556     SetRect(&rect, 0, h, w, 0);
5557     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5558     context_active_texture(context, context->gl_info, 0);
5559     gl_info->gl_ops.gl.p_glGetIntegerv(info.binding, &old_binding);
5560     gl_info->gl_ops.gl.p_glBindTexture(info.bind_target, texture);
5561     if (gl_info->supported[ARB_SHADOW])
5562     {
5563         gl_info->gl_ops.gl.p_glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5564         if (compare_mode != GL_NONE)
5565             gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5566     }
5567
5568     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5569             gl_info, info.tex_type, &surface->ds_current_size);
5570
5571     gl_info->gl_ops.gl.p_glBegin(GL_TRIANGLE_STRIP);
5572     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[0]);
5573     gl_info->gl_ops.gl.p_glVertex2f(-1.0f, -1.0f);
5574     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[1]);
5575     gl_info->gl_ops.gl.p_glVertex2f(1.0f, -1.0f);
5576     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[2]);
5577     gl_info->gl_ops.gl.p_glVertex2f(-1.0f, 1.0f);
5578     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[3]);
5579     gl_info->gl_ops.gl.p_glVertex2f(1.0f, 1.0f);
5580     gl_info->gl_ops.gl.p_glEnd();
5581
5582     if (compare_mode != GL_NONE)
5583         gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5584     gl_info->gl_ops.gl.p_glBindTexture(info.bind_target, old_binding);
5585
5586     gl_info->gl_ops.gl.p_glPopAttrib();
5587
5588     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5589 }
5590
5591 void surface_modify_ds_location(struct wined3d_surface *surface,
5592         DWORD location, UINT w, UINT h)
5593 {
5594     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5595
5596     if (location & ~(SFLAG_LOCATIONS | SFLAG_DISCARDED))
5597         FIXME("Invalid location (%#x) specified.\n", location);
5598
5599     if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5600             || (!(surface->flags & SFLAG_INTEXTURE) && (location & SFLAG_INTEXTURE)))
5601     {
5602         if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5603         {
5604             TRACE("Passing to container.\n");
5605             wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5606         }
5607     }
5608
5609     surface->ds_current_size.cx = w;
5610     surface->ds_current_size.cy = h;
5611     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_DISCARDED);
5612     surface->flags |= location;
5613 }
5614
5615 /* Context activation is done by the caller. */
5616 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5617 {
5618     const struct wined3d_gl_info *gl_info = context->gl_info;
5619     struct wined3d_device *device = surface->resource.device;
5620     GLsizei w, h;
5621
5622     TRACE("surface %p, new location %#x.\n", surface, location);
5623
5624     /* TODO: Make this work for modes other than FBO */
5625     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5626
5627     if (!(surface->flags & location))
5628     {
5629         w = surface->ds_current_size.cx;
5630         h = surface->ds_current_size.cy;
5631         surface->ds_current_size.cx = 0;
5632         surface->ds_current_size.cy = 0;
5633     }
5634     else
5635     {
5636         w = surface->resource.width;
5637         h = surface->resource.height;
5638     }
5639
5640     if (surface->ds_current_size.cx == surface->resource.width
5641             && surface->ds_current_size.cy == surface->resource.height)
5642     {
5643         TRACE("Location (%#x) is already up to date.\n", location);
5644         return;
5645     }
5646
5647     if (surface->current_renderbuffer)
5648     {
5649         FIXME("Not supported with fixed up depth stencil.\n");
5650         return;
5651     }
5652
5653     if (surface->flags & SFLAG_DISCARDED)
5654     {
5655         TRACE("Surface was discarded, no need copy data.\n");
5656         switch (location)
5657         {
5658             case SFLAG_INTEXTURE:
5659                 surface_prepare_texture(surface, context, FALSE);
5660                 break;
5661             case SFLAG_INRB_MULTISAMPLE:
5662                 surface_prepare_rb(surface, gl_info, TRUE);
5663                 break;
5664             case SFLAG_INDRAWABLE:
5665                 /* Nothing to do */
5666                 break;
5667             default:
5668                 FIXME("Unhandled location %#x\n", location);
5669         }
5670         surface->flags &= ~SFLAG_DISCARDED;
5671         surface->flags |= location;
5672         surface->ds_current_size.cx = surface->resource.width;
5673         surface->ds_current_size.cy = surface->resource.height;
5674         return;
5675     }
5676
5677     if (!(surface->flags & SFLAG_LOCATIONS))
5678     {
5679         FIXME("No up to date depth stencil location.\n");
5680         surface->flags |= location;
5681         surface->ds_current_size.cx = surface->resource.width;
5682         surface->ds_current_size.cy = surface->resource.height;
5683         return;
5684     }
5685
5686     if (location == SFLAG_INTEXTURE)
5687     {
5688         GLint old_binding = 0;
5689         GLenum bind_target;
5690
5691         /* The render target is allowed to be smaller than the depth/stencil
5692          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5693          * than the offscreen surface. Don't overwrite the offscreen surface
5694          * with undefined data. */
5695         w = min(w, context->swapchain->desc.backbuffer_width);
5696         h = min(h, context->swapchain->desc.backbuffer_height);
5697
5698         TRACE("Copying onscreen depth buffer to depth texture.\n");
5699
5700         if (!device->depth_blt_texture)
5701             gl_info->gl_ops.gl.p_glGenTextures(1, &device->depth_blt_texture);
5702
5703         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5704          * directly on the FBO texture. That's because we need to flip. */
5705         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5706                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5707         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5708         {
5709             gl_info->gl_ops.gl.p_glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5710             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5711         }
5712         else
5713         {
5714             gl_info->gl_ops.gl.p_glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5715             bind_target = GL_TEXTURE_2D;
5716         }
5717         gl_info->gl_ops.gl.p_glBindTexture(bind_target, device->depth_blt_texture);
5718         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5719          * internal format, because the internal format might include stencil
5720          * data. In principle we should copy stencil data as well, but unless
5721          * the driver supports stencil export it's hard to do, and doesn't
5722          * seem to be needed in practice. If the hardware doesn't support
5723          * writing stencil data, the glCopyTexImage2D() call might trigger
5724          * software fallbacks. */
5725         gl_info->gl_ops.gl.p_glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5726         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5727         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5728         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5729         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5730         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5731         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5732         gl_info->gl_ops.gl.p_glBindTexture(bind_target, old_binding);
5733
5734         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5735                 NULL, surface, SFLAG_INTEXTURE);
5736         context_set_draw_buffer(context, GL_NONE);
5737         gl_info->gl_ops.gl.p_glReadBuffer(GL_NONE);
5738
5739         /* Do the actual blit */
5740         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5741         checkGLcall("depth_blt");
5742
5743         context_invalidate_state(context, STATE_FRAMEBUFFER);
5744
5745         if (wined3d_settings.strict_draw_ordering)
5746             gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5747     }
5748     else if (location == SFLAG_INDRAWABLE)
5749     {
5750         TRACE("Copying depth texture to onscreen depth buffer.\n");
5751
5752         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5753                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5754         surface_depth_blt(surface, context, surface->texture_name,
5755                 0, surface->pow2Height - h, w, h, surface->texture_target);
5756         checkGLcall("depth_blt");
5757
5758         context_invalidate_state(context, STATE_FRAMEBUFFER);
5759
5760         if (wined3d_settings.strict_draw_ordering)
5761             gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5762     }
5763     else
5764     {
5765         ERR("Invalid location (%#x) specified.\n", location);
5766     }
5767
5768     surface->flags |= location;
5769     surface->ds_current_size.cx = surface->resource.width;
5770     surface->ds_current_size.cy = surface->resource.height;
5771 }
5772
5773 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5774 {
5775     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5776     struct wined3d_surface *overlay;
5777
5778     TRACE("surface %p, location %s, persistent %#x.\n",
5779             surface, debug_surflocation(location), persistent);
5780
5781     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5782             && !(surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5783             && (location & SFLAG_INDRAWABLE))
5784         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5785
5786     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5787             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5788         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5789
5790     if (persistent)
5791     {
5792         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5793                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5794         {
5795             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5796             {
5797                 TRACE("Passing to container.\n");
5798                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5799             }
5800         }
5801         surface->flags &= ~SFLAG_LOCATIONS;
5802         surface->flags |= location;
5803
5804         /* Redraw emulated overlays, if any */
5805         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5806         {
5807             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5808             {
5809                 surface_draw_overlay(overlay);
5810             }
5811         }
5812     }
5813     else
5814     {
5815         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5816         {
5817             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5818             {
5819                 TRACE("Passing to container\n");
5820                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5821             }
5822         }
5823         surface->flags &= ~location;
5824     }
5825
5826     if (!(surface->flags & SFLAG_LOCATIONS))
5827     {
5828         ERR("Surface %p does not have any up to date location.\n", surface);
5829     }
5830 }
5831
5832 static DWORD resource_access_from_location(DWORD location)
5833 {
5834     switch (location)
5835     {
5836         case SFLAG_INSYSMEM:
5837             return WINED3D_RESOURCE_ACCESS_CPU;
5838
5839         case SFLAG_INDRAWABLE:
5840         case SFLAG_INSRGBTEX:
5841         case SFLAG_INTEXTURE:
5842         case SFLAG_INRB_MULTISAMPLE:
5843         case SFLAG_INRB_RESOLVED:
5844             return WINED3D_RESOURCE_ACCESS_GPU;
5845
5846         default:
5847             FIXME("Unhandled location %#x.\n", location);
5848             return 0;
5849     }
5850 }
5851
5852 static void surface_load_sysmem(struct wined3d_surface *surface,
5853         const struct wined3d_gl_info *gl_info, const RECT *rect)
5854 {
5855     surface_prepare_system_memory(surface);
5856
5857     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5858         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5859
5860     /* Download the surface to system memory. */
5861     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5862     {
5863         struct wined3d_device *device = surface->resource.device;
5864         struct wined3d_context *context;
5865
5866         /* TODO: Use already acquired context when possible. */
5867         context = context_acquire(device, NULL);
5868
5869         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5870         surface_download_data(surface, gl_info);
5871
5872         context_release(context);
5873
5874         return;
5875     }
5876
5877     if (surface->flags & SFLAG_INDRAWABLE)
5878     {
5879         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5880                 wined3d_surface_get_pitch(surface));
5881         return;
5882     }
5883
5884     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5885             surface, surface->flags & SFLAG_LOCATIONS);
5886 }
5887
5888 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5889         const struct wined3d_gl_info *gl_info, const RECT *rect)
5890 {
5891     struct wined3d_device *device = surface->resource.device;
5892     enum wined3d_conversion_type convert;
5893     struct wined3d_format format;
5894     UINT byte_count;
5895     BYTE *mem;
5896
5897     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5898     {
5899         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5900         return WINED3DERR_INVALIDCALL;
5901     }
5902
5903     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5904         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5905
5906     if (surface->flags & SFLAG_INTEXTURE)
5907     {
5908         RECT r;
5909
5910         surface_get_rect(surface, rect, &r);
5911         surface_blt_to_drawable(device, WINED3D_TEXF_POINT, FALSE, surface, &r, surface, &r);
5912
5913         return WINED3D_OK;
5914     }
5915
5916     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5917     {
5918         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5919          * path through sysmem. */
5920         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5921     }
5922
5923     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5924
5925     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5926      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5927      * called. */
5928     if ((convert != WINED3D_CT_NONE) && (surface->flags & SFLAG_PBO))
5929     {
5930         struct wined3d_context *context;
5931
5932         TRACE("Removing the pbo attached to surface %p.\n", surface);
5933
5934         /* TODO: Use already acquired context when possible. */
5935         context = context_acquire(device, NULL);
5936
5937         surface_remove_pbo(surface, gl_info);
5938
5939         context_release(context);
5940     }
5941
5942     if ((convert != WINED3D_CT_NONE) && surface->resource.allocatedMemory)
5943     {
5944         UINT height = surface->resource.height;
5945         UINT width = surface->resource.width;
5946         UINT src_pitch, dst_pitch;
5947
5948         byte_count = format.conv_byte_count;
5949         src_pitch = wined3d_surface_get_pitch(surface);
5950
5951         /* Stick to the alignment for the converted surface too, makes it
5952          * easier to load the surface. */
5953         dst_pitch = width * byte_count;
5954         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5955
5956         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5957         {
5958             ERR("Out of memory (%u).\n", dst_pitch * height);
5959             return E_OUTOFMEMORY;
5960         }
5961
5962         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5963                 src_pitch, width, height, dst_pitch, convert, surface);
5964
5965         surface->flags |= SFLAG_CONVERTED;
5966     }
5967     else
5968     {
5969         surface->flags &= ~SFLAG_CONVERTED;
5970         mem = surface->resource.allocatedMemory;
5971         byte_count = format.byte_count;
5972     }
5973
5974     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5975
5976     /* Don't delete PBO memory. */
5977     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5978         HeapFree(GetProcessHeap(), 0, mem);
5979
5980     return WINED3D_OK;
5981 }
5982
5983 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5984         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5985 {
5986     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5987     struct wined3d_device *device = surface->resource.device;
5988     enum wined3d_conversion_type convert;
5989     struct wined3d_context *context;
5990     UINT width, src_pitch, dst_pitch;
5991     struct wined3d_bo_address data;
5992     struct wined3d_format format;
5993     POINT dst_point = {0, 0};
5994     BYTE *mem;
5995
5996     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5997             && surface_is_offscreen(surface)
5998             && (surface->flags & SFLAG_INDRAWABLE))
5999     {
6000         surface_load_fb_texture(surface, srgb);
6001
6002         return WINED3D_OK;
6003     }
6004
6005     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6006             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
6007             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6008                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6009                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6010     {
6011         if (srgb)
6012             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INTEXTURE,
6013                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6014         else
6015             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INSRGBTEX,
6016                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6017
6018         return WINED3D_OK;
6019     }
6020
6021     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6022             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6023             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6024                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6025                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6026     {
6027         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6028         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6029         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6030
6031         surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, src_location,
6032                 &rect, surface, dst_location, &rect);
6033
6034         return WINED3D_OK;
6035     }
6036
6037     /* Upload from system memory */
6038
6039     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6040             TRUE /* We will use textures */, &format, &convert);
6041
6042     if (srgb)
6043     {
6044         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6045         {
6046             /* Performance warning... */
6047             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6048             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6049         }
6050     }
6051     else
6052     {
6053         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6054         {
6055             /* Performance warning... */
6056             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6057             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6058         }
6059     }
6060
6061     if (!(surface->flags & SFLAG_INSYSMEM))
6062     {
6063         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6064         /* Lets hope we get it from somewhere... */
6065         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6066     }
6067
6068     /* TODO: Use already acquired context when possible. */
6069     context = context_acquire(device, NULL);
6070
6071     surface_prepare_texture(surface, context, srgb);
6072     surface_bind_and_dirtify(surface, context, srgb);
6073
6074     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6075     {
6076         surface->flags |= SFLAG_GLCKEY;
6077         surface->gl_color_key = surface->src_blt_color_key;
6078     }
6079     else surface->flags &= ~SFLAG_GLCKEY;
6080
6081     width = surface->resource.width;
6082     src_pitch = wined3d_surface_get_pitch(surface);
6083
6084     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6085      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6086      * called. */
6087     if ((convert != WINED3D_CT_NONE || format.convert) && (surface->flags & SFLAG_PBO))
6088     {
6089         TRACE("Removing the pbo attached to surface %p.\n", surface);
6090         surface_remove_pbo(surface, gl_info);
6091     }
6092
6093     if (format.convert)
6094     {
6095         /* This code is entered for texture formats which need a fixup. */
6096         UINT height = surface->resource.height;
6097
6098         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6099         dst_pitch = width * format.conv_byte_count;
6100         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6101
6102         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6103         {
6104             ERR("Out of memory (%u).\n", dst_pitch * height);
6105             context_release(context);
6106             return E_OUTOFMEMORY;
6107         }
6108         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6109         format.byte_count = format.conv_byte_count;
6110         src_pitch = dst_pitch;
6111     }
6112     else if (convert != WINED3D_CT_NONE && surface->resource.allocatedMemory)
6113     {
6114         /* This code is only entered for color keying fixups */
6115         UINT height = surface->resource.height;
6116
6117         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6118         dst_pitch = width * format.conv_byte_count;
6119         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6120
6121         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6122         {
6123             ERR("Out of memory (%u).\n", dst_pitch * height);
6124             context_release(context);
6125             return E_OUTOFMEMORY;
6126         }
6127         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6128                 width, height, dst_pitch, convert, surface);
6129         format.byte_count = format.conv_byte_count;
6130         src_pitch = dst_pitch;
6131     }
6132     else
6133     {
6134         mem = surface->resource.allocatedMemory;
6135     }
6136
6137     data.buffer_object = surface->pbo;
6138     data.addr = mem;
6139     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6140
6141     context_release(context);
6142
6143     /* Don't delete PBO memory. */
6144     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6145         HeapFree(GetProcessHeap(), 0, mem);
6146
6147     return WINED3D_OK;
6148 }
6149
6150 static void surface_multisample_resolve(struct wined3d_surface *surface)
6151 {
6152     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6153
6154     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6155         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6156
6157     surface_blt_fbo(surface->resource.device, WINED3D_TEXF_POINT,
6158             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6159 }
6160
6161 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6162 {
6163     struct wined3d_device *device = surface->resource.device;
6164     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6165     HRESULT hr;
6166
6167     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6168
6169     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6170     {
6171         if (location == SFLAG_INTEXTURE)
6172         {
6173             struct wined3d_context *context = context_acquire(device, NULL);
6174             surface_load_ds_location(surface, context, location);
6175             context_release(context);
6176             return WINED3D_OK;
6177         }
6178         else
6179         {
6180             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6181             return WINED3DERR_INVALIDCALL;
6182         }
6183     }
6184
6185     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6186         location = SFLAG_INTEXTURE;
6187
6188     if (surface->flags & location)
6189     {
6190         TRACE("Location already up to date.\n");
6191
6192         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6193                 && surface_need_pbo(surface, gl_info))
6194             surface_load_pbo(surface, gl_info);
6195
6196         return WINED3D_OK;
6197     }
6198
6199     if (WARN_ON(d3d_surface))
6200     {
6201         DWORD required_access = resource_access_from_location(location);
6202         if ((surface->resource.access_flags & required_access) != required_access)
6203             WARN("Operation requires %#x access, but surface only has %#x.\n",
6204                     required_access, surface->resource.access_flags);
6205     }
6206
6207     if (!(surface->flags & SFLAG_LOCATIONS))
6208     {
6209         ERR("Surface %p does not have any up to date location.\n", surface);
6210         surface->flags |= SFLAG_LOST;
6211         return WINED3DERR_DEVICELOST;
6212     }
6213
6214     switch (location)
6215     {
6216         case SFLAG_INSYSMEM:
6217             surface_load_sysmem(surface, gl_info, rect);
6218             break;
6219
6220         case SFLAG_INDRAWABLE:
6221             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6222                 return hr;
6223             break;
6224
6225         case SFLAG_INRB_RESOLVED:
6226             surface_multisample_resolve(surface);
6227             break;
6228
6229         case SFLAG_INTEXTURE:
6230         case SFLAG_INSRGBTEX:
6231             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6232                 return hr;
6233             break;
6234
6235         default:
6236             ERR("Don't know how to handle location %#x.\n", location);
6237             break;
6238     }
6239
6240     if (!rect)
6241     {
6242         surface->flags |= location;
6243
6244         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6245             surface_evict_sysmem(surface);
6246     }
6247
6248     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6249             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6250     {
6251         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6252     }
6253
6254     return WINED3D_OK;
6255 }
6256
6257 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6258 {
6259     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6260
6261     /* Not on a swapchain - must be offscreen */
6262     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6263
6264     /* The front buffer is always onscreen */
6265     if (surface == swapchain->front_buffer) return FALSE;
6266
6267     /* If the swapchain is rendered to an FBO, the backbuffer is
6268      * offscreen, otherwise onscreen */
6269     return swapchain->render_to_fbo;
6270 }
6271
6272 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6273 /* Context activation is done by the caller. */
6274 static void ffp_blit_free(struct wined3d_device *device) { }
6275
6276 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6277 /* Context activation is done by the caller. */
6278 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6279 {
6280     BYTE table[256][4];
6281     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) != 0;
6282     GLenum target;
6283
6284     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
6285         target = surface->container.u.texture->target;
6286     else
6287         target = surface->texture_target;
6288
6289     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6290
6291     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6292     GL_EXTCALL(glColorTableEXT(target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6293 }
6294
6295 /* Context activation is done by the caller. */
6296 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6297 {
6298     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6299     const struct wined3d_gl_info *gl_info = context->gl_info;
6300     GLenum target;
6301
6302     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
6303         target = surface->container.u.texture->target;
6304     else
6305         target = surface->texture_target;
6306
6307     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6308      * else the surface is converted in software at upload time in LoadLocation.
6309      */
6310     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6311             && gl_info->supported[EXT_PALETTED_TEXTURE])
6312         ffp_blit_p8_upload_palette(surface, gl_info);
6313
6314     gl_info->gl_ops.gl.p_glEnable(target);
6315     checkGLcall("glEnable(target)");
6316
6317     return WINED3D_OK;
6318 }
6319
6320 /* Context activation is done by the caller. */
6321 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6322 {
6323     gl_info->gl_ops.gl.p_glDisable(GL_TEXTURE_2D);
6324     checkGLcall("glDisable(GL_TEXTURE_2D)");
6325     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6326     {
6327         gl_info->gl_ops.gl.p_glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6328         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6329     }
6330     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6331     {
6332         gl_info->gl_ops.gl.p_glDisable(GL_TEXTURE_RECTANGLE_ARB);
6333         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6334     }
6335 }
6336
6337 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6338         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6339         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6340 {
6341     enum complex_fixup src_fixup;
6342
6343     switch (blit_op)
6344     {
6345         case WINED3D_BLIT_OP_COLOR_BLIT:
6346             if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
6347                 return FALSE;
6348
6349             src_fixup = get_complex_fixup(src_format->color_fixup);
6350             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6351             {
6352                 TRACE("Checking support for fixup:\n");
6353                 dump_color_fixup_desc(src_format->color_fixup);
6354             }
6355
6356             if (!is_identity_fixup(dst_format->color_fixup))
6357             {
6358                 TRACE("Destination fixups are not supported\n");
6359                 return FALSE;
6360             }
6361
6362             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6363             {
6364                 TRACE("P8 fixup supported\n");
6365                 return TRUE;
6366             }
6367
6368             /* We only support identity conversions. */
6369             if (is_identity_fixup(src_format->color_fixup))
6370             {
6371                 TRACE("[OK]\n");
6372                 return TRUE;
6373             }
6374
6375             TRACE("[FAILED]\n");
6376             return FALSE;
6377
6378         case WINED3D_BLIT_OP_COLOR_FILL:
6379             if (dst_pool == WINED3D_POOL_SYSTEM_MEM)
6380                 return FALSE;
6381
6382             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6383             {
6384                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6385                     return FALSE;
6386             }
6387             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6388             {
6389                 TRACE("Color fill not supported\n");
6390                 return FALSE;
6391             }
6392
6393             /* FIXME: We should reject color fills on formats with fixups,
6394              * but this would break P8 color fills for example. */
6395
6396             return TRUE;
6397
6398         case WINED3D_BLIT_OP_DEPTH_FILL:
6399             return TRUE;
6400
6401         default:
6402             TRACE("Unsupported blit_op=%d\n", blit_op);
6403             return FALSE;
6404     }
6405 }
6406
6407 /* Do not call while under the GL lock. */
6408 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6409         const RECT *dst_rect, const struct wined3d_color *color)
6410 {
6411     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6412     struct wined3d_fb_state fb = {&dst_surface, NULL};
6413
6414     device_clear_render_targets(device, 1, &fb, 1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6415
6416     return WINED3D_OK;
6417 }
6418
6419 /* Do not call while under the GL lock. */
6420 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6421         struct wined3d_surface *surface, const RECT *rect, float depth)
6422 {
6423     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6424     struct wined3d_fb_state fb = {NULL, surface};
6425
6426     device_clear_render_targets(device, 0, &fb, 1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6427
6428     return WINED3D_OK;
6429 }
6430
6431 const struct blit_shader ffp_blit =  {
6432     ffp_blit_alloc,
6433     ffp_blit_free,
6434     ffp_blit_set,
6435     ffp_blit_unset,
6436     ffp_blit_supported,
6437     ffp_blit_color_fill,
6438     ffp_blit_depth_fill,
6439 };
6440
6441 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6442 {
6443     return WINED3D_OK;
6444 }
6445
6446 /* Context activation is done by the caller. */
6447 static void cpu_blit_free(struct wined3d_device *device)
6448 {
6449 }
6450
6451 /* Context activation is done by the caller. */
6452 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6453 {
6454     return WINED3D_OK;
6455 }
6456
6457 /* Context activation is done by the caller. */
6458 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6459 {
6460 }
6461
6462 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6463         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6464         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6465 {
6466     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6467     {
6468         return TRUE;
6469     }
6470
6471     return FALSE;
6472 }
6473
6474 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6475         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6476         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6477 {
6478     UINT row_block_count;
6479     const BYTE *src_row;
6480     BYTE *dst_row;
6481     UINT x, y;
6482
6483     src_row = src_data;
6484     dst_row = dst_data;
6485
6486     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6487
6488     if (!flags)
6489     {
6490         for (y = 0; y < update_h; y += format->block_height)
6491         {
6492             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6493             src_row += src_pitch;
6494             dst_row += dst_pitch;
6495         }
6496
6497         return WINED3D_OK;
6498     }
6499
6500     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6501     {
6502         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6503
6504         switch (format->id)
6505         {
6506             case WINED3DFMT_DXT1:
6507                 for (y = 0; y < update_h; y += format->block_height)
6508                 {
6509                     struct block
6510                     {
6511                         WORD color[2];
6512                         BYTE control_row[4];
6513                     };
6514
6515                     const struct block *s = (const struct block *)src_row;
6516                     struct block *d = (struct block *)dst_row;
6517
6518                     for (x = 0; x < row_block_count; ++x)
6519                     {
6520                         d[x].color[0] = s[x].color[0];
6521                         d[x].color[1] = s[x].color[1];
6522                         d[x].control_row[0] = s[x].control_row[3];
6523                         d[x].control_row[1] = s[x].control_row[2];
6524                         d[x].control_row[2] = s[x].control_row[1];
6525                         d[x].control_row[3] = s[x].control_row[0];
6526                     }
6527                     src_row -= src_pitch;
6528                     dst_row += dst_pitch;
6529                 }
6530                 return WINED3D_OK;
6531
6532             case WINED3DFMT_DXT3:
6533                 for (y = 0; y < update_h; y += format->block_height)
6534                 {
6535                     struct block
6536                     {
6537                         WORD alpha_row[4];
6538                         WORD color[2];
6539                         BYTE control_row[4];
6540                     };
6541
6542                     const struct block *s = (const struct block *)src_row;
6543                     struct block *d = (struct block *)dst_row;
6544
6545                     for (x = 0; x < row_block_count; ++x)
6546                     {
6547                         d[x].alpha_row[0] = s[x].alpha_row[3];
6548                         d[x].alpha_row[1] = s[x].alpha_row[2];
6549                         d[x].alpha_row[2] = s[x].alpha_row[1];
6550                         d[x].alpha_row[3] = s[x].alpha_row[0];
6551                         d[x].color[0] = s[x].color[0];
6552                         d[x].color[1] = s[x].color[1];
6553                         d[x].control_row[0] = s[x].control_row[3];
6554                         d[x].control_row[1] = s[x].control_row[2];
6555                         d[x].control_row[2] = s[x].control_row[1];
6556                         d[x].control_row[3] = s[x].control_row[0];
6557                     }
6558                     src_row -= src_pitch;
6559                     dst_row += dst_pitch;
6560                 }
6561                 return WINED3D_OK;
6562
6563             default:
6564                 FIXME("Compressed flip not implemented for format %s.\n",
6565                         debug_d3dformat(format->id));
6566                 return E_NOTIMPL;
6567         }
6568     }
6569
6570     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6571             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6572
6573     return E_NOTIMPL;
6574 }
6575
6576 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6577         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6578         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
6579 {
6580     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6581     const struct wined3d_format *src_format, *dst_format;
6582     struct wined3d_surface *orig_src = src_surface;
6583     struct wined3d_map_desc dst_map, src_map;
6584     const BYTE *sbase = NULL;
6585     HRESULT hr = WINED3D_OK;
6586     const BYTE *sbuf;
6587     BYTE *dbuf;
6588     int x, y;
6589
6590     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6591             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6592             flags, fx, debug_d3dtexturefiltertype(filter));
6593
6594     if (src_surface == dst_surface)
6595     {
6596         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6597         src_map = dst_map;
6598         src_format = dst_surface->resource.format;
6599         dst_format = src_format;
6600     }
6601     else
6602     {
6603         dst_format = dst_surface->resource.format;
6604         if (src_surface)
6605         {
6606             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6607             {
6608                 src_surface = surface_convert_format(src_surface, dst_format->id);
6609                 if (!src_surface)
6610                 {
6611                     /* The conv function writes a FIXME */
6612                     WARN("Cannot convert source surface format to dest format.\n");
6613                     goto release;
6614                 }
6615             }
6616             wined3d_surface_map(src_surface, &src_map, NULL, WINED3D_MAP_READONLY);
6617             src_format = src_surface->resource.format;
6618         }
6619         else
6620         {
6621             src_format = dst_format;
6622         }
6623
6624         wined3d_surface_map(dst_surface, &dst_map, dst_rect, 0);
6625     }
6626
6627     bpp = dst_surface->resource.format->byte_count;
6628     srcheight = src_rect->bottom - src_rect->top;
6629     srcwidth = src_rect->right - src_rect->left;
6630     dstheight = dst_rect->bottom - dst_rect->top;
6631     dstwidth = dst_rect->right - dst_rect->left;
6632     width = (dst_rect->right - dst_rect->left) * bpp;
6633
6634     if (src_surface)
6635         sbase = (BYTE *)src_map.data
6636                 + ((src_rect->top / src_format->block_height) * src_map.row_pitch)
6637                 + ((src_rect->left / src_format->block_width) * src_format->block_byte_count);
6638     if (src_surface != dst_surface)
6639         dbuf = dst_map.data;
6640     else
6641         dbuf = (BYTE *)dst_map.data
6642                 + ((dst_rect->top / dst_format->block_height) * dst_map.row_pitch)
6643                 + ((dst_rect->left / dst_format->block_width) * dst_format->block_byte_count);
6644
6645     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6646     {
6647         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6648
6649         if (src_surface == dst_surface)
6650         {
6651             FIXME("Only plain blits supported on compressed surfaces.\n");
6652             hr = E_NOTIMPL;
6653             goto release;
6654         }
6655
6656         if (srcheight != dstheight || srcwidth != dstwidth)
6657         {
6658             WARN("Stretching not supported on compressed surfaces.\n");
6659             hr = WINED3DERR_INVALIDCALL;
6660             goto release;
6661         }
6662
6663         if (!surface_check_block_align(src_surface, src_rect))
6664         {
6665             WARN("Source rectangle not block-aligned.\n");
6666             hr = WINED3DERR_INVALIDCALL;
6667             goto release;
6668         }
6669
6670         if (!surface_check_block_align(dst_surface, dst_rect))
6671         {
6672             WARN("Destination rectangle not block-aligned.\n");
6673             hr = WINED3DERR_INVALIDCALL;
6674             goto release;
6675         }
6676
6677         hr = surface_cpu_blt_compressed(sbase, dbuf,
6678                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6679                 src_format, flags, fx);
6680         goto release;
6681     }
6682
6683     /* First, all the 'source-less' blits */
6684     if (flags & WINEDDBLT_COLORFILL)
6685     {
6686         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6687         flags &= ~WINEDDBLT_COLORFILL;
6688     }
6689
6690     if (flags & WINEDDBLT_DEPTHFILL)
6691     {
6692         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6693     }
6694     if (flags & WINEDDBLT_ROP)
6695     {
6696         /* Catch some degenerate cases here. */
6697         switch (fx->dwROP)
6698         {
6699             case BLACKNESS:
6700                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6701                 break;
6702             case 0xaa0029: /* No-op */
6703                 break;
6704             case WHITENESS:
6705                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6706                 break;
6707             case SRCCOPY: /* Well, we do that below? */
6708                 break;
6709             default:
6710                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6711                 goto error;
6712         }
6713         flags &= ~WINEDDBLT_ROP;
6714     }
6715     if (flags & WINEDDBLT_DDROPS)
6716     {
6717         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6718     }
6719     /* Now the 'with source' blits. */
6720     if (src_surface)
6721     {
6722         int sx, xinc, sy, yinc;
6723
6724         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6725             goto release;
6726
6727         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT
6728                 && (srcwidth != dstwidth || srcheight != dstheight))
6729         {
6730             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6731             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6732         }
6733
6734         xinc = (srcwidth << 16) / dstwidth;
6735         yinc = (srcheight << 16) / dstheight;
6736
6737         if (!flags)
6738         {
6739             /* No effects, we can cheat here. */
6740             if (dstwidth == srcwidth)
6741             {
6742                 if (dstheight == srcheight)
6743                 {
6744                     /* No stretching in either direction. This needs to be as
6745                      * fast as possible. */
6746                     sbuf = sbase;
6747
6748                     /* Check for overlapping surfaces. */
6749                     if (src_surface != dst_surface || dst_rect->top < src_rect->top
6750                             || dst_rect->right <= src_rect->left || src_rect->right <= dst_rect->left)
6751                     {
6752                         /* No overlap, or dst above src, so copy from top downwards. */
6753                         for (y = 0; y < dstheight; ++y)
6754                         {
6755                             memcpy(dbuf, sbuf, width);
6756                             sbuf += src_map.row_pitch;
6757                             dbuf += dst_map.row_pitch;
6758                         }
6759                     }
6760                     else if (dst_rect->top > src_rect->top)
6761                     {
6762                         /* Copy from bottom upwards. */
6763                         sbuf += src_map.row_pitch * dstheight;
6764                         dbuf += dst_map.row_pitch * dstheight;
6765                         for (y = 0; y < dstheight; ++y)
6766                         {
6767                             sbuf -= src_map.row_pitch;
6768                             dbuf -= dst_map.row_pitch;
6769                             memcpy(dbuf, sbuf, width);
6770                         }
6771                     }
6772                     else
6773                     {
6774                         /* Src and dst overlapping on the same line, use memmove. */
6775                         for (y = 0; y < dstheight; ++y)
6776                         {
6777                             memmove(dbuf, sbuf, width);
6778                             sbuf += src_map.row_pitch;
6779                             dbuf += dst_map.row_pitch;
6780                         }
6781                     }
6782                 }
6783                 else
6784                 {
6785                     /* Stretching in y direction only. */
6786                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6787                     {
6788                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6789                         memcpy(dbuf, sbuf, width);
6790                         dbuf += dst_map.row_pitch;
6791                     }
6792                 }
6793             }
6794             else
6795             {
6796                 /* Stretching in X direction. */
6797                 int last_sy = -1;
6798                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6799                 {
6800                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6801
6802                     if ((sy >> 16) == (last_sy >> 16))
6803                     {
6804                         /* This source row is the same as last source row -
6805                          * Copy the already stretched row. */
6806                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6807                     }
6808                     else
6809                     {
6810 #define STRETCH_ROW(type) \
6811 do { \
6812     const type *s = (const type *)sbuf; \
6813     type *d = (type *)dbuf; \
6814     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6815         d[x] = s[sx >> 16]; \
6816 } while(0)
6817
6818                         switch(bpp)
6819                         {
6820                             case 1:
6821                                 STRETCH_ROW(BYTE);
6822                                 break;
6823                             case 2:
6824                                 STRETCH_ROW(WORD);
6825                                 break;
6826                             case 4:
6827                                 STRETCH_ROW(DWORD);
6828                                 break;
6829                             case 3:
6830                             {
6831                                 const BYTE *s;
6832                                 BYTE *d = dbuf;
6833                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6834                                 {
6835                                     DWORD pixel;
6836
6837                                     s = sbuf + 3 * (sx >> 16);
6838                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6839                                     d[0] = (pixel      ) & 0xff;
6840                                     d[1] = (pixel >>  8) & 0xff;
6841                                     d[2] = (pixel >> 16) & 0xff;
6842                                     d += 3;
6843                                 }
6844                                 break;
6845                             }
6846                             default:
6847                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6848                                 hr = WINED3DERR_NOTAVAILABLE;
6849                                 goto error;
6850                         }
6851 #undef STRETCH_ROW
6852                     }
6853                     dbuf += dst_map.row_pitch;
6854                     last_sy = sy;
6855                 }
6856             }
6857         }
6858         else
6859         {
6860             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6861             DWORD keylow = 0xffffffff, keyhigh = 0, keymask = 0xffffffff;
6862             DWORD destkeylow = 0x0, destkeyhigh = 0xffffffff, destkeymask = 0xffffffff;
6863             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6864             {
6865                 /* The color keying flags are checked for correctness in ddraw */
6866                 if (flags & WINEDDBLT_KEYSRC)
6867                 {
6868                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6869                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6870                 }
6871                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6872                 {
6873                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6874                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6875                 }
6876
6877                 if (flags & WINEDDBLT_KEYDEST)
6878                 {
6879                     /* Destination color keys are taken from the source surface! */
6880                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6881                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6882                 }
6883                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6884                 {
6885                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6886                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6887                 }
6888
6889                 if (bpp == 1)
6890                 {
6891                     keymask = 0xff;
6892                 }
6893                 else
6894                 {
6895                     DWORD masks[3];
6896                     get_color_masks(src_format, masks);
6897                     keymask = masks[0]
6898                             | masks[1]
6899                             | masks[2];
6900                 }
6901                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6902             }
6903
6904             if (flags & WINEDDBLT_DDFX)
6905             {
6906                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6907                 LONG tmpxy;
6908                 dTopLeft     = dbuf;
6909                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6910                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6911                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6912
6913                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6914                 {
6915                     /* I don't think we need to do anything about this flag */
6916                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6917                 }
6918                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6919                 {
6920                     tmp          = dTopRight;
6921                     dTopRight    = dTopLeft;
6922                     dTopLeft     = tmp;
6923                     tmp          = dBottomRight;
6924                     dBottomRight = dBottomLeft;
6925                     dBottomLeft  = tmp;
6926                     dstxinc = dstxinc * -1;
6927                 }
6928                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6929                 {
6930                     tmp          = dTopLeft;
6931                     dTopLeft     = dBottomLeft;
6932                     dBottomLeft  = tmp;
6933                     tmp          = dTopRight;
6934                     dTopRight    = dBottomRight;
6935                     dBottomRight = tmp;
6936                     dstyinc = dstyinc * -1;
6937                 }
6938                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6939                 {
6940                     /* I don't think we need to do anything about this flag */
6941                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6942                 }
6943                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6944                 {
6945                     tmp          = dBottomRight;
6946                     dBottomRight = dTopLeft;
6947                     dTopLeft     = tmp;
6948                     tmp          = dBottomLeft;
6949                     dBottomLeft  = dTopRight;
6950                     dTopRight    = tmp;
6951                     dstxinc = dstxinc * -1;
6952                     dstyinc = dstyinc * -1;
6953                 }
6954                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6955                 {
6956                     tmp          = dTopLeft;
6957                     dTopLeft     = dBottomLeft;
6958                     dBottomLeft  = dBottomRight;
6959                     dBottomRight = dTopRight;
6960                     dTopRight    = tmp;
6961                     tmpxy   = dstxinc;
6962                     dstxinc = dstyinc;
6963                     dstyinc = tmpxy;
6964                     dstxinc = dstxinc * -1;
6965                 }
6966                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6967                 {
6968                     tmp          = dTopLeft;
6969                     dTopLeft     = dTopRight;
6970                     dTopRight    = dBottomRight;
6971                     dBottomRight = dBottomLeft;
6972                     dBottomLeft  = tmp;
6973                     tmpxy   = dstxinc;
6974                     dstxinc = dstyinc;
6975                     dstyinc = tmpxy;
6976                     dstyinc = dstyinc * -1;
6977                 }
6978                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6979                 {
6980                     /* I don't think we need to do anything about this flag */
6981                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6982                 }
6983                 dbuf = dTopLeft;
6984                 flags &= ~(WINEDDBLT_DDFX);
6985             }
6986
6987 #define COPY_COLORKEY_FX(type) \
6988 do { \
6989     const type *s; \
6990     type *d = (type *)dbuf, *dx, tmp; \
6991     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6992     { \
6993         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
6994         dx = d; \
6995         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6996         { \
6997             tmp = s[sx >> 16]; \
6998             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6999                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7000             { \
7001                 dx[0] = tmp; \
7002             } \
7003             dx = (type *)(((BYTE *)dx) + dstxinc); \
7004         } \
7005         d = (type *)(((BYTE *)d) + dstyinc); \
7006     } \
7007 } while(0)
7008
7009             switch (bpp)
7010             {
7011                 case 1:
7012                     COPY_COLORKEY_FX(BYTE);
7013                     break;
7014                 case 2:
7015                     COPY_COLORKEY_FX(WORD);
7016                     break;
7017                 case 4:
7018                     COPY_COLORKEY_FX(DWORD);
7019                     break;
7020                 case 3:
7021                 {
7022                     const BYTE *s;
7023                     BYTE *d = dbuf, *dx;
7024                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7025                     {
7026                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7027                         dx = d;
7028                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7029                         {
7030                             DWORD pixel, dpixel = 0;
7031                             s = sbuf + 3 * (sx>>16);
7032                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7033                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7034                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7035                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7036                             {
7037                                 dx[0] = (pixel      ) & 0xff;
7038                                 dx[1] = (pixel >>  8) & 0xff;
7039                                 dx[2] = (pixel >> 16) & 0xff;
7040                             }
7041                             dx += dstxinc;
7042                         }
7043                         d += dstyinc;
7044                     }
7045                     break;
7046                 }
7047                 default:
7048                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7049                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7050                     hr = WINED3DERR_NOTAVAILABLE;
7051                     goto error;
7052 #undef COPY_COLORKEY_FX
7053             }
7054         }
7055     }
7056
7057 error:
7058     if (flags && FIXME_ON(d3d_surface))
7059     {
7060         FIXME("\tUnsupported flags: %#x.\n", flags);
7061     }
7062
7063 release:
7064     wined3d_surface_unmap(dst_surface);
7065     if (src_surface && src_surface != dst_surface)
7066         wined3d_surface_unmap(src_surface);
7067     /* Release the converted surface, if any. */
7068     if (src_surface && src_surface != orig_src)
7069         wined3d_surface_decref(src_surface);
7070
7071     return hr;
7072 }
7073
7074 /* Do not call while under the GL lock. */
7075 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7076         const RECT *dst_rect, const struct wined3d_color *color)
7077 {
7078     static const RECT src_rect;
7079     WINEDDBLTFX BltFx;
7080
7081     memset(&BltFx, 0, sizeof(BltFx));
7082     BltFx.dwSize = sizeof(BltFx);
7083     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7084     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7085             WINEDDBLT_COLORFILL, &BltFx, WINED3D_TEXF_POINT);
7086 }
7087
7088 /* Do not call while under the GL lock. */
7089 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7090         struct wined3d_surface *surface, const RECT *rect, float depth)
7091 {
7092     FIXME("Depth filling not implemented by cpu_blit.\n");
7093     return WINED3DERR_INVALIDCALL;
7094 }
7095
7096 const struct blit_shader cpu_blit =  {
7097     cpu_blit_alloc,
7098     cpu_blit_free,
7099     cpu_blit_set,
7100     cpu_blit_unset,
7101     cpu_blit_supported,
7102     cpu_blit_color_fill,
7103     cpu_blit_depth_fill,
7104 };
7105
7106 static HRESULT surface_init(struct wined3d_surface *surface, enum wined3d_surface_type surface_type,
7107         UINT alignment, UINT width, UINT height, enum wined3d_multisample_type multisample_type,
7108         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7109         enum wined3d_pool pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7110 {
7111     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7112     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7113     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7114     unsigned int resource_size;
7115     HRESULT hr;
7116
7117     if (multisample_quality > 0)
7118     {
7119         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7120         multisample_quality = 0;
7121     }
7122
7123     /* Quick lockable sanity check.
7124      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7125      * this function is too deep to need to care about things like this.
7126      * Levels need to be checked too, since they all affect what can be done. */
7127     switch (pool)
7128     {
7129         case WINED3D_POOL_SCRATCH:
7130             if (!lockable)
7131             {
7132                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7133                         "which are mutually exclusive, setting lockable to TRUE.\n");
7134                 lockable = TRUE;
7135             }
7136             break;
7137
7138         case WINED3D_POOL_SYSTEM_MEM:
7139             if (!lockable)
7140                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7141             break;
7142
7143         case WINED3D_POOL_MANAGED:
7144             if (usage & WINED3DUSAGE_DYNAMIC)
7145                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7146             break;
7147
7148         case WINED3D_POOL_DEFAULT:
7149             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7150                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7151             break;
7152
7153         default:
7154             FIXME("Unknown pool %#x.\n", pool);
7155             break;
7156     };
7157
7158     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3D_POOL_DEFAULT)
7159         FIXME("Trying to create a render target that isn't in the default pool.\n");
7160
7161     /* FIXME: Check that the format is supported by the device. */
7162
7163     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7164     if (!resource_size)
7165         return WINED3DERR_INVALIDCALL;
7166
7167     surface->surface_type = surface_type;
7168
7169     switch (surface_type)
7170     {
7171         case WINED3D_SURFACE_TYPE_OPENGL:
7172             surface->surface_ops = &surface_ops;
7173             break;
7174
7175         case WINED3D_SURFACE_TYPE_GDI:
7176             surface->surface_ops = &gdi_surface_ops;
7177             break;
7178
7179         default:
7180             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7181             return WINED3DERR_INVALIDCALL;
7182     }
7183
7184     hr = resource_init(&surface->resource, device, WINED3D_RTYPE_SURFACE, format,
7185             multisample_type, multisample_quality, usage, pool, width, height, 1,
7186             resource_size, parent, parent_ops, &surface_resource_ops);
7187     if (FAILED(hr))
7188     {
7189         WARN("Failed to initialize resource, returning %#x.\n", hr);
7190         return hr;
7191     }
7192
7193     /* "Standalone" surface. */
7194     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7195
7196     list_init(&surface->overlays);
7197
7198     /* Flags */
7199     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7200     if (flags & WINED3D_SURFACE_DISCARD)
7201         surface->flags |= SFLAG_DISCARD;
7202     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7203         surface->flags |= SFLAG_PIN_SYSMEM;
7204     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7205         surface->flags |= SFLAG_LOCKABLE;
7206     /* I'm not sure if this qualifies as a hack or as an optimization. It
7207      * seems reasonable to assume that lockable render targets will get
7208      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7209      * creation. However, the other reason we want to do this is that several
7210      * ddraw applications access surface memory while the surface isn't
7211      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7212      * future locks prevents these from crashing. */
7213     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7214         surface->flags |= SFLAG_DYNLOCK;
7215
7216     /* Mark the texture as dirty so that it gets loaded first time around. */
7217     surface_add_dirty_rect(surface, NULL);
7218     list_init(&surface->renderbuffers);
7219
7220     TRACE("surface %p, memory %p, size %u\n",
7221             surface, surface->resource.allocatedMemory, surface->resource.size);
7222
7223     /* Call the private setup routine */
7224     hr = surface->surface_ops->surface_private_setup(surface);
7225     if (FAILED(hr))
7226     {
7227         ERR("Private setup failed, returning %#x\n", hr);
7228         surface_cleanup(surface);
7229         return hr;
7230     }
7231
7232     /* Similar to lockable rendertargets above, creating the DIB section
7233      * during surface initialization prevents the sysmem pointer from changing
7234      * after a wined3d_surface_getdc() call. */
7235     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7236             && SUCCEEDED(surface_create_dib_section(surface)))
7237     {
7238         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7239         surface->resource.heapMemory = NULL;
7240         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7241     }
7242
7243     return hr;
7244 }
7245
7246 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7247         enum wined3d_format_id format_id, DWORD usage, enum wined3d_pool pool,
7248         enum wined3d_multisample_type multisample_type, DWORD multisample_quality,
7249         enum wined3d_surface_type surface_type, DWORD flags, void *parent,
7250         const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7251 {
7252     struct wined3d_surface *object;
7253     HRESULT hr;
7254
7255     TRACE("device %p, width %u, height %u, format %s\n",
7256             device, width, height, debug_d3dformat(format_id));
7257     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7258             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7259     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7260
7261     if (surface_type == WINED3D_SURFACE_TYPE_OPENGL && !device->adapter)
7262     {
7263         ERR("OpenGL surfaces are not available without OpenGL.\n");
7264         return WINED3DERR_NOTAVAILABLE;
7265     }
7266
7267     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7268     if (!object)
7269     {
7270         ERR("Failed to allocate surface memory.\n");
7271         return WINED3DERR_OUTOFVIDEOMEMORY;
7272     }
7273
7274     if (FAILED(hr = surface_init(object, surface_type, device->surface_alignment, width, height,
7275             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops)))
7276     {
7277         WARN("Failed to initialize surface, returning %#x.\n", hr);
7278         HeapFree(GetProcessHeap(), 0, object);
7279         return hr;
7280     }
7281
7282     TRACE("Created surface %p.\n", object);
7283     *surface = object;
7284
7285     return hr;
7286 }