Release 1.5.29.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
35 WINE_DECLARE_DEBUG_CHANNEL(d3d);
36
37 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
38         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
39         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter);
40 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
41         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
42         enum wined3d_texture_filter_type filter);
43
44 static void surface_cleanup(struct wined3d_surface *surface)
45 {
46     struct wined3d_surface *overlay, *cur;
47
48     TRACE("surface %p.\n", surface);
49
50     if (surface->texture_name || (surface->flags & SFLAG_PBO)
51              || surface->rb_multisample || surface->rb_resolved
52              || !list_empty(&surface->renderbuffers))
53     {
54         struct wined3d_renderbuffer_entry *entry, *entry2;
55         const struct wined3d_gl_info *gl_info;
56         struct wined3d_context *context;
57
58         context = context_acquire(surface->resource.device, NULL);
59         gl_info = context->gl_info;
60
61         if (surface->texture_name)
62         {
63             TRACE("Deleting texture %u.\n", surface->texture_name);
64             gl_info->gl_ops.gl.p_glDeleteTextures(1, &surface->texture_name);
65         }
66
67         if (surface->flags & SFLAG_PBO)
68         {
69             TRACE("Deleting PBO %u.\n", surface->pbo);
70             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
71         }
72
73         if (surface->rb_multisample)
74         {
75             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
76             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
77         }
78
79         if (surface->rb_resolved)
80         {
81             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
82             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
83         }
84
85         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
86         {
87             TRACE("Deleting renderbuffer %u.\n", entry->id);
88             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
89             HeapFree(GetProcessHeap(), 0, entry);
90         }
91
92         context_release(context);
93     }
94
95     if (surface->flags & SFLAG_DIBSECTION)
96     {
97         DeleteDC(surface->hDC);
98         DeleteObject(surface->dib.DIBsection);
99         surface->dib.bitmap_data = NULL;
100         surface->resource.allocatedMemory = NULL;
101     }
102
103     if (surface->flags & SFLAG_USERPTR)
104         wined3d_surface_set_mem(surface, NULL);
105     if (surface->overlay_dest)
106         list_remove(&surface->overlay_entry);
107
108     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
109     {
110         list_remove(&overlay->overlay_entry);
111         overlay->overlay_dest = NULL;
112     }
113
114     resource_cleanup(&surface->resource);
115 }
116
117 void surface_update_draw_binding(struct wined3d_surface *surface)
118 {
119     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
120         surface->draw_binding = SFLAG_INDRAWABLE;
121     else if (surface->resource.multisample_type)
122         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
123     else
124         surface->draw_binding = SFLAG_INTEXTURE;
125 }
126
127 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
128 {
129     TRACE("surface %p, container %p.\n", surface, container);
130
131     if (!container && type != WINED3D_CONTAINER_NONE)
132         ERR("Setting NULL container of type %#x.\n", type);
133
134     if (type == WINED3D_CONTAINER_SWAPCHAIN)
135     {
136         surface->get_drawable_size = get_drawable_size_swapchain;
137     }
138     else
139     {
140         switch (wined3d_settings.offscreen_rendering_mode)
141         {
142             case ORM_FBO:
143                 surface->get_drawable_size = get_drawable_size_fbo;
144                 break;
145
146             case ORM_BACKBUFFER:
147                 surface->get_drawable_size = get_drawable_size_backbuffer;
148                 break;
149
150             default:
151                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
152                 return;
153         }
154     }
155
156     surface->container.type = type;
157     surface->container.u.base = container;
158     surface_update_draw_binding(surface);
159 }
160
161 struct blt_info
162 {
163     GLenum binding;
164     GLenum bind_target;
165     enum tex_types tex_type;
166     GLfloat coords[4][3];
167 };
168
169 struct float_rect
170 {
171     float l;
172     float t;
173     float r;
174     float b;
175 };
176
177 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
178 {
179     f->l = ((r->left * 2.0f) / w) - 1.0f;
180     f->t = ((r->top * 2.0f) / h) - 1.0f;
181     f->r = ((r->right * 2.0f) / w) - 1.0f;
182     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
183 }
184
185 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
186 {
187     GLfloat (*coords)[3] = info->coords;
188     struct float_rect f;
189
190     switch (target)
191     {
192         default:
193             FIXME("Unsupported texture target %#x\n", target);
194             /* Fall back to GL_TEXTURE_2D */
195         case GL_TEXTURE_2D:
196             info->binding = GL_TEXTURE_BINDING_2D;
197             info->bind_target = GL_TEXTURE_2D;
198             info->tex_type = tex_2d;
199             coords[0][0] = (float)rect->left / w;
200             coords[0][1] = (float)rect->top / h;
201             coords[0][2] = 0.0f;
202
203             coords[1][0] = (float)rect->right / w;
204             coords[1][1] = (float)rect->top / h;
205             coords[1][2] = 0.0f;
206
207             coords[2][0] = (float)rect->left / w;
208             coords[2][1] = (float)rect->bottom / h;
209             coords[2][2] = 0.0f;
210
211             coords[3][0] = (float)rect->right / w;
212             coords[3][1] = (float)rect->bottom / h;
213             coords[3][2] = 0.0f;
214             break;
215
216         case GL_TEXTURE_RECTANGLE_ARB:
217             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
218             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
219             info->tex_type = tex_rect;
220             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
221             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
222             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
223             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
224             break;
225
226         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
227             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
228             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
229             info->tex_type = tex_cube;
230             cube_coords_float(rect, w, h, &f);
231
232             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
233             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
234             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
235             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
236             break;
237
238         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
239             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
240             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
241             info->tex_type = tex_cube;
242             cube_coords_float(rect, w, h, &f);
243
244             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
245             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
246             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
247             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
248             break;
249
250         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
251             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
252             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
253             info->tex_type = tex_cube;
254             cube_coords_float(rect, w, h, &f);
255
256             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
257             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
258             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
259             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
260             break;
261
262         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
263             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
264             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
265             info->tex_type = tex_cube;
266             cube_coords_float(rect, w, h, &f);
267
268             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
269             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
270             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
271             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
272             break;
273
274         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
275             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
276             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
277             info->tex_type = tex_cube;
278             cube_coords_float(rect, w, h, &f);
279
280             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
281             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
282             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
283             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
284             break;
285
286         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
287             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
288             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
289             info->tex_type = tex_cube;
290             cube_coords_float(rect, w, h, &f);
291
292             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
293             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
294             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
295             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
296             break;
297     }
298 }
299
300 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
301 {
302     if (rect_in)
303         *rect_out = *rect_in;
304     else
305     {
306         rect_out->left = 0;
307         rect_out->top = 0;
308         rect_out->right = surface->resource.width;
309         rect_out->bottom = surface->resource.height;
310     }
311 }
312
313 /* Context activation is done by the caller. */
314 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
315         const RECT *src_rect, const RECT *dst_rect, enum wined3d_texture_filter_type filter)
316 {
317     const struct wined3d_gl_info *gl_info = context->gl_info;
318     struct blt_info info;
319
320     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
321
322     gl_info->gl_ops.gl.p_glEnable(info.bind_target);
323     checkGLcall("glEnable(bind_target)");
324
325     context_bind_texture(context, info.bind_target, src_surface->texture_name);
326
327     /* Filtering for StretchRect */
328     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
329             wined3d_gl_mag_filter(magLookup, filter));
330     checkGLcall("glTexParameteri");
331     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
332             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
333     checkGLcall("glTexParameteri");
334     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
335     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
336     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
337         gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
338     gl_info->gl_ops.gl.p_glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
339     checkGLcall("glTexEnvi");
340
341     /* Draw a quad */
342     gl_info->gl_ops.gl.p_glBegin(GL_TRIANGLE_STRIP);
343     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[0]);
344     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->left, dst_rect->top);
345
346     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[1]);
347     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->right, dst_rect->top);
348
349     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[2]);
350     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->left, dst_rect->bottom);
351
352     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[3]);
353     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->right, dst_rect->bottom);
354     gl_info->gl_ops.gl.p_glEnd();
355
356     /* Unbind the texture */
357     context_bind_texture(context, info.bind_target, 0);
358
359     /* We changed the filtering settings on the texture. Inform the
360      * container about this to get the filters reset properly next draw. */
361     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
362     {
363         struct wined3d_texture *texture = src_surface->container.u.texture;
364         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3D_TEXF_POINT;
365         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3D_TEXF_POINT;
366         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3D_TEXF_NONE;
367         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
368     }
369 }
370
371 /* Works correctly only for <= 4 bpp formats. */
372 static void get_color_masks(const struct wined3d_format *format, DWORD *masks)
373 {
374     masks[0] = ((1 << format->red_size) - 1) << format->red_offset;
375     masks[1] = ((1 << format->green_size) - 1) << format->green_offset;
376     masks[2] = ((1 << format->blue_size) - 1) << format->blue_offset;
377 }
378
379 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
380 {
381     const struct wined3d_format *format = surface->resource.format;
382     SYSTEM_INFO sysInfo;
383     BITMAPINFO *b_info;
384     int extraline = 0;
385     DWORD *masks;
386
387     TRACE("surface %p.\n", surface);
388
389     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
390     {
391         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
392         return WINED3DERR_INVALIDCALL;
393     }
394
395     switch (format->byte_count)
396     {
397         case 2:
398         case 4:
399             /* Allocate extra space to store the RGB bit masks. */
400             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
401             break;
402
403         case 3:
404             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
405             break;
406
407         default:
408             /* Allocate extra space for a palette. */
409             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
410                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
411             break;
412     }
413
414     if (!b_info)
415         return E_OUTOFMEMORY;
416
417     /* Some applications access the surface in via DWORDs, and do not take
418      * the necessary care at the end of the surface. So we need at least
419      * 4 extra bytes at the end of the surface. Check against the page size,
420      * if the last page used for the surface has at least 4 spare bytes we're
421      * safe, otherwise add an extra line to the DIB section. */
422     GetSystemInfo(&sysInfo);
423     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
424     {
425         extraline = 1;
426         TRACE("Adding an extra line to the DIB section.\n");
427     }
428
429     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
430     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
431     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
432     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
433     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
434             * wined3d_surface_get_pitch(surface);
435     b_info->bmiHeader.biPlanes = 1;
436     b_info->bmiHeader.biBitCount = format->byte_count * 8;
437
438     b_info->bmiHeader.biXPelsPerMeter = 0;
439     b_info->bmiHeader.biYPelsPerMeter = 0;
440     b_info->bmiHeader.biClrUsed = 0;
441     b_info->bmiHeader.biClrImportant = 0;
442
443     /* Get the bit masks */
444     masks = (DWORD *)b_info->bmiColors;
445     switch (surface->resource.format->id)
446     {
447         case WINED3DFMT_B8G8R8_UNORM:
448             b_info->bmiHeader.biCompression = BI_RGB;
449             break;
450
451         case WINED3DFMT_B5G5R5X1_UNORM:
452         case WINED3DFMT_B5G5R5A1_UNORM:
453         case WINED3DFMT_B4G4R4A4_UNORM:
454         case WINED3DFMT_B4G4R4X4_UNORM:
455         case WINED3DFMT_B2G3R3_UNORM:
456         case WINED3DFMT_B2G3R3A8_UNORM:
457         case WINED3DFMT_R10G10B10A2_UNORM:
458         case WINED3DFMT_R8G8B8A8_UNORM:
459         case WINED3DFMT_R8G8B8X8_UNORM:
460         case WINED3DFMT_B10G10R10A2_UNORM:
461         case WINED3DFMT_B5G6R5_UNORM:
462         case WINED3DFMT_R16G16B16A16_UNORM:
463             b_info->bmiHeader.biCompression = BI_BITFIELDS;
464             get_color_masks(format, masks);
465             break;
466
467         default:
468             /* Don't know palette */
469             b_info->bmiHeader.biCompression = BI_RGB;
470             break;
471     }
472
473     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
474             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
475             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
476     surface->dib.DIBsection = CreateDIBSection(0, b_info, DIB_RGB_COLORS, &surface->dib.bitmap_data, 0, 0);
477
478     if (!surface->dib.DIBsection)
479     {
480         ERR("Failed to create DIB section.\n");
481         HeapFree(GetProcessHeap(), 0, b_info);
482         return HRESULT_FROM_WIN32(GetLastError());
483     }
484
485     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
486     /* Copy the existing surface to the dib section. */
487     if (surface->resource.allocatedMemory)
488     {
489         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
490                 surface->resource.height * wined3d_surface_get_pitch(surface));
491     }
492     else
493     {
494         /* This is to make maps read the GL texture although memory is allocated. */
495         surface->flags &= ~SFLAG_INSYSMEM;
496     }
497     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
498
499     HeapFree(GetProcessHeap(), 0, b_info);
500
501     /* Now allocate a DC. */
502     surface->hDC = CreateCompatibleDC(0);
503     SelectObject(surface->hDC, surface->dib.DIBsection);
504     TRACE("Using wined3d palette %p.\n", surface->palette);
505     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
506
507     surface->flags |= SFLAG_DIBSECTION;
508
509     return WINED3D_OK;
510 }
511
512 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
513 {
514     if (surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
515         return FALSE;
516     if (!(surface->flags & SFLAG_DYNLOCK))
517         return FALSE;
518     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
519         return FALSE;
520     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
521         return FALSE;
522
523     return TRUE;
524 }
525
526 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
527 {
528     struct wined3d_context *context;
529     GLenum error;
530
531     context = context_acquire(surface->resource.device, NULL);
532
533     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
534     error = gl_info->gl_ops.gl.p_glGetError();
535     if (!surface->pbo || error != GL_NO_ERROR)
536         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
537
538     TRACE("Binding PBO %u.\n", surface->pbo);
539
540     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
541     checkGLcall("glBindBufferARB");
542
543     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
544             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
545     checkGLcall("glBufferDataARB");
546
547     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
548     checkGLcall("glBindBufferARB");
549
550     /* We don't need the system memory anymore and we can't even use it for PBOs. */
551     if (!(surface->flags & SFLAG_CLIENT))
552     {
553         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
554         surface->resource.heapMemory = NULL;
555     }
556     surface->resource.allocatedMemory = NULL;
557     surface->flags |= SFLAG_PBO;
558     context_release(context);
559 }
560
561 static void surface_prepare_system_memory(struct wined3d_surface *surface)
562 {
563     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
564
565     TRACE("surface %p.\n", surface);
566
567     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
568         surface_load_pbo(surface, gl_info);
569     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
570     {
571         /* Whatever surface we have, make sure that there is memory allocated
572          * for the downloaded copy, or a PBO to map. */
573         if (!surface->resource.heapMemory)
574             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
575
576         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
577                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
578
579         if (surface->flags & SFLAG_INSYSMEM)
580             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
581     }
582 }
583
584 static void surface_evict_sysmem(struct wined3d_surface *surface)
585 {
586     if (surface->resource.map_count || (surface->flags & SFLAG_DONOTFREE))
587         return;
588
589     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
590     surface->resource.allocatedMemory = NULL;
591     surface->resource.heapMemory = NULL;
592     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
593 }
594
595 /* Context activation is done by the caller. */
596 static void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
597 {
598     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
599
600     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
601     {
602         struct wined3d_texture *texture = surface->container.u.texture;
603
604         TRACE("Passing to container (%p).\n", texture);
605         texture->texture_ops->texture_bind(texture, context, srgb);
606     }
607     else
608     {
609         const struct wined3d_gl_info *gl_info = context->gl_info;
610
611         if (surface->texture_level)
612         {
613             ERR("Standalone surface %p is non-zero texture level %u.\n",
614                     surface, surface->texture_level);
615         }
616
617         if (srgb)
618             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
619
620         if (!surface->texture_name)
621         {
622             gl_info->gl_ops.gl.p_glGenTextures(1, &surface->texture_name);
623             checkGLcall("glGenTextures");
624
625             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
626
627             context_bind_texture(context, surface->texture_target, surface->texture_name);
628             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
629             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
630             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
631             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
632             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
633             checkGLcall("glTexParameteri");
634         }
635         else
636         {
637             context_bind_texture(context, surface->texture_target, surface->texture_name);
638         }
639     }
640 }
641
642 /* Context activation is done by the caller. */
643 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
644         struct wined3d_context *context, BOOL srgb)
645 {
646     struct wined3d_device *device = surface->resource.device;
647     DWORD active_sampler;
648
649     /* We don't need a specific texture unit, but after binding the texture
650      * the current unit is dirty. Read the unit back instead of switching to
651      * 0, this avoids messing around with the state manager's GL states. The
652      * current texture unit should always be a valid one.
653      *
654      * To be more specific, this is tricky because we can implicitly be
655      * called from sampler() in state.c. This means we can't touch anything
656      * other than whatever happens to be the currently active texture, or we
657      * would risk marking already applied sampler states dirty again. */
658     active_sampler = device->rev_tex_unit_map[context->active_texture];
659
660     if (active_sampler != WINED3D_UNMAPPED_STAGE)
661         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
662     surface_bind(surface, context, srgb);
663 }
664
665 static void surface_force_reload(struct wined3d_surface *surface)
666 {
667     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
668 }
669
670 static void surface_release_client_storage(struct wined3d_surface *surface)
671 {
672     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
673     const struct wined3d_gl_info *gl_info = context->gl_info;
674
675     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
676     if (surface->texture_name)
677     {
678         surface_bind_and_dirtify(surface, context, FALSE);
679         gl_info->gl_ops.gl.p_glTexImage2D(surface->texture_target, surface->texture_level,
680                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
681     }
682     if (surface->texture_name_srgb)
683     {
684         surface_bind_and_dirtify(surface, context, TRUE);
685         gl_info->gl_ops.gl.p_glTexImage2D(surface->texture_target, surface->texture_level,
686                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
687     }
688     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
689
690     context_release(context);
691
692     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
693     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
694     surface_force_reload(surface);
695 }
696
697 static HRESULT surface_private_setup(struct wined3d_surface *surface)
698 {
699     /* TODO: Check against the maximum texture sizes supported by the video card. */
700     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
701     unsigned int pow2Width, pow2Height;
702
703     TRACE("surface %p.\n", surface);
704
705     surface->texture_name = 0;
706     surface->texture_target = GL_TEXTURE_2D;
707
708     /* Non-power2 support */
709     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
710     {
711         pow2Width = surface->resource.width;
712         pow2Height = surface->resource.height;
713     }
714     else
715     {
716         /* Find the nearest pow2 match */
717         pow2Width = pow2Height = 1;
718         while (pow2Width < surface->resource.width)
719             pow2Width <<= 1;
720         while (pow2Height < surface->resource.height)
721             pow2Height <<= 1;
722     }
723     surface->pow2Width = pow2Width;
724     surface->pow2Height = pow2Height;
725
726     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
727     {
728         /* TODO: Add support for non power two compressed textures. */
729         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
730         {
731             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
732                   surface, surface->resource.width, surface->resource.height);
733             return WINED3DERR_NOTAVAILABLE;
734         }
735     }
736
737     if (pow2Width != surface->resource.width
738             || pow2Height != surface->resource.height)
739     {
740         surface->flags |= SFLAG_NONPOW2;
741     }
742
743     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
744             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
745     {
746         /* One of three options:
747          * 1: Do the same as we do with NPOT and scale the texture, (any
748          *    texture ops would require the texture to be scaled which is
749          *    potentially slow)
750          * 2: Set the texture to the maximum size (bad idea).
751          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
752          * 4: Create the surface, but allow it to be used only for DirectDraw
753          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
754          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
755          *    the render target. */
756         if (surface->resource.pool == WINED3D_POOL_DEFAULT || surface->resource.pool == WINED3D_POOL_MANAGED)
757         {
758             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
759             return WINED3DERR_NOTAVAILABLE;
760         }
761
762         /* We should never use this surface in combination with OpenGL! */
763         TRACE("Creating an oversized surface: %ux%u.\n",
764                 surface->pow2Width, surface->pow2Height);
765     }
766     else
767     {
768         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
769          * and EXT_PALETTED_TEXTURE is used in combination with texture
770          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
771          * EXT_PALETTED_TEXTURE doesn't work in combination with
772          * ARB_TEXTURE_RECTANGLE. */
773         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
774                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
775                 && gl_info->supported[EXT_PALETTED_TEXTURE]
776                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
777         {
778             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
779             surface->pow2Width = surface->resource.width;
780             surface->pow2Height = surface->resource.height;
781             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
782         }
783     }
784
785     switch (wined3d_settings.offscreen_rendering_mode)
786     {
787         case ORM_FBO:
788             surface->get_drawable_size = get_drawable_size_fbo;
789             break;
790
791         case ORM_BACKBUFFER:
792             surface->get_drawable_size = get_drawable_size_backbuffer;
793             break;
794
795         default:
796             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
797             return WINED3DERR_INVALIDCALL;
798     }
799
800     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
801         surface->flags |= SFLAG_DISCARDED;
802
803     return WINED3D_OK;
804 }
805
806 static void surface_realize_palette(struct wined3d_surface *surface)
807 {
808     struct wined3d_palette *palette = surface->palette;
809
810     TRACE("surface %p.\n", surface);
811
812     if (!palette) return;
813
814     if (surface->resource.format->id == WINED3DFMT_P8_UINT
815             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
816     {
817         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
818         {
819             /* Make sure the texture is up to date. This call doesn't do
820              * anything if the texture is already up to date. */
821             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
822
823             /* We want to force a palette refresh, so mark the drawable as not being up to date */
824             if (!surface_is_offscreen(surface))
825                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
826         }
827         else
828         {
829             if (!(surface->flags & SFLAG_INSYSMEM))
830             {
831                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
832                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
833             }
834             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
835         }
836     }
837
838     if (surface->flags & SFLAG_DIBSECTION)
839     {
840         RGBQUAD col[256];
841         unsigned int i;
842
843         TRACE("Updating the DC's palette.\n");
844
845         for (i = 0; i < 256; ++i)
846         {
847             col[i].rgbRed   = palette->palents[i].peRed;
848             col[i].rgbGreen = palette->palents[i].peGreen;
849             col[i].rgbBlue  = palette->palents[i].peBlue;
850             col[i].rgbReserved = 0;
851         }
852         SetDIBColorTable(surface->hDC, 0, 256, col);
853     }
854
855     /* Propagate the changes to the drawable when we have a palette. */
856     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
857         surface_load_location(surface, surface->draw_binding, NULL);
858 }
859
860 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
861 {
862     HRESULT hr;
863
864     /* If there's no destination surface there is nothing to do. */
865     if (!surface->overlay_dest)
866         return WINED3D_OK;
867
868     /* Blt calls ModifyLocation on the dest surface, which in turn calls
869      * DrawOverlay to update the overlay. Prevent an endless recursion. */
870     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
871         return WINED3D_OK;
872
873     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
874     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
875             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3D_TEXF_LINEAR);
876     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
877
878     return hr;
879 }
880
881 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
882 {
883     struct wined3d_device *device = surface->resource.device;
884     const RECT *pass_rect = rect;
885
886     TRACE("surface %p, rect %s, flags %#x.\n",
887             surface, wine_dbgstr_rect(rect), flags);
888
889     if (flags & WINED3D_MAP_DISCARD)
890     {
891         TRACE("WINED3D_MAP_DISCARD flag passed, marking SYSMEM as up to date.\n");
892         surface_prepare_system_memory(surface);
893         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
894     }
895     else
896     {
897         if (surface->resource.usage & WINED3DUSAGE_DYNAMIC)
898             WARN_(d3d_perf)("Mapping a dynamic surface without WINED3D_MAP_DISCARD.\n");
899
900         /* surface_load_location() does not check if the rectangle specifies
901          * the full surface. Most callers don't need that, so do it here. */
902         if (rect && !rect->top && !rect->left
903                 && rect->right == surface->resource.width
904                 && rect->bottom == surface->resource.height)
905             pass_rect = NULL;
906         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
907     }
908
909     if (surface->flags & SFLAG_PBO)
910     {
911         const struct wined3d_gl_info *gl_info;
912         struct wined3d_context *context;
913
914         context = context_acquire(device, NULL);
915         gl_info = context->gl_info;
916
917         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
918         checkGLcall("glBindBufferARB");
919
920         /* This shouldn't happen but could occur if some other function
921          * didn't handle the PBO properly. */
922         if (surface->resource.allocatedMemory)
923             ERR("The surface already has PBO memory allocated.\n");
924
925         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
926         checkGLcall("glMapBufferARB");
927
928         /* Make sure the PBO isn't set anymore in order not to break non-PBO
929          * calls. */
930         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
931         checkGLcall("glBindBufferARB");
932
933         context_release(context);
934     }
935
936     if (!(flags & (WINED3D_MAP_NO_DIRTY_UPDATE | WINED3D_MAP_READONLY)))
937     {
938         if (!rect)
939             surface_add_dirty_rect(surface, NULL);
940         else
941         {
942             struct wined3d_box b;
943
944             b.left = rect->left;
945             b.top = rect->top;
946             b.right = rect->right;
947             b.bottom = rect->bottom;
948             b.front = 0;
949             b.back = 1;
950             surface_add_dirty_rect(surface, &b);
951         }
952     }
953 }
954
955 static void surface_unmap(struct wined3d_surface *surface)
956 {
957     struct wined3d_device *device = surface->resource.device;
958     BOOL fullsurface;
959
960     TRACE("surface %p.\n", surface);
961
962     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
963
964     if (surface->flags & SFLAG_PBO)
965     {
966         const struct wined3d_gl_info *gl_info;
967         struct wined3d_context *context;
968
969         TRACE("Freeing PBO memory.\n");
970
971         context = context_acquire(device, NULL);
972         gl_info = context->gl_info;
973
974         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
975         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
976         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
977         checkGLcall("glUnmapBufferARB");
978         context_release(context);
979
980         surface->resource.allocatedMemory = NULL;
981     }
982
983     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
984
985     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
986     {
987         TRACE("Not dirtified, nothing to do.\n");
988         goto done;
989     }
990
991     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
992             && surface->container.u.swapchain->front_buffer == surface)
993     {
994         if (!surface->dirtyRect.left && !surface->dirtyRect.top
995                 && surface->dirtyRect.right == surface->resource.width
996                 && surface->dirtyRect.bottom == surface->resource.height)
997         {
998             fullsurface = TRUE;
999         }
1000         else
1001         {
1002             /* TODO: Proper partial rectangle tracking. */
1003             fullsurface = FALSE;
1004             surface->flags |= SFLAG_INSYSMEM;
1005         }
1006
1007         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
1008
1009         /* Partial rectangle tracking is not commonly implemented, it is only
1010          * done for render targets. INSYSMEM was set before to tell
1011          * surface_load_location() where to read the rectangle from.
1012          * Indrawable is set because all modifications from the partial
1013          * sysmem copy are written back to the drawable, thus the surface is
1014          * merged again in the drawable. The sysmem copy is not fully up to
1015          * date because only a subrectangle was read in Map(). */
1016         if (!fullsurface)
1017         {
1018             surface_modify_location(surface, surface->draw_binding, TRUE);
1019             surface_evict_sysmem(surface);
1020         }
1021
1022         surface->dirtyRect.left = surface->resource.width;
1023         surface->dirtyRect.top = surface->resource.height;
1024         surface->dirtyRect.right = 0;
1025         surface->dirtyRect.bottom = 0;
1026     }
1027     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1028     {
1029         FIXME("Depth / stencil buffer locking is not implemented.\n");
1030     }
1031
1032 done:
1033     /* Overlays have to be redrawn manually after changes with the GL implementation */
1034     if (surface->overlay_dest)
1035         surface_draw_overlay(surface);
1036 }
1037
1038 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1039 {
1040     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1041         return FALSE;
1042     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1043         return FALSE;
1044     return TRUE;
1045 }
1046
1047 static void surface_depth_blt_fbo(const struct wined3d_device *device,
1048         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect,
1049         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect)
1050 {
1051     const struct wined3d_gl_info *gl_info;
1052     struct wined3d_context *context;
1053     DWORD src_mask, dst_mask;
1054     GLbitfield gl_mask;
1055
1056     TRACE("device %p\n", device);
1057     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1058             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect));
1059     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1060             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect));
1061
1062     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1063     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1064
1065     if (src_mask != dst_mask)
1066     {
1067         ERR("Incompatible formats %s and %s.\n",
1068                 debug_d3dformat(src_surface->resource.format->id),
1069                 debug_d3dformat(dst_surface->resource.format->id));
1070         return;
1071     }
1072
1073     if (!src_mask)
1074     {
1075         ERR("Not a depth / stencil format: %s.\n",
1076                 debug_d3dformat(src_surface->resource.format->id));
1077         return;
1078     }
1079
1080     gl_mask = 0;
1081     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1082         gl_mask |= GL_DEPTH_BUFFER_BIT;
1083     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1084         gl_mask |= GL_STENCIL_BUFFER_BIT;
1085
1086     /* Make sure the locations are up-to-date. Loading the destination
1087      * surface isn't required if the entire surface is overwritten. */
1088     surface_load_location(src_surface, src_location, NULL);
1089     if (!surface_is_full_rect(dst_surface, dst_rect))
1090         surface_load_location(dst_surface, dst_location, NULL);
1091
1092     context = context_acquire(device, NULL);
1093     if (!context->valid)
1094     {
1095         context_release(context);
1096         WARN("Invalid context, skipping blit.\n");
1097         return;
1098     }
1099
1100     gl_info = context->gl_info;
1101
1102     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, src_location);
1103     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1104
1105     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, dst_location);
1106     context_set_draw_buffer(context, GL_NONE);
1107     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1108     context_invalidate_state(context, STATE_FRAMEBUFFER);
1109
1110     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1111     {
1112         gl_info->gl_ops.gl.p_glDepthMask(GL_TRUE);
1113         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1114     }
1115     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1116     {
1117         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1118         {
1119             gl_info->gl_ops.gl.p_glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1120             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1121         }
1122         gl_info->gl_ops.gl.p_glStencilMask(~0U);
1123         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1124     }
1125
1126     gl_info->gl_ops.gl.p_glDisable(GL_SCISSOR_TEST);
1127     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1128
1129     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1130             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1131     checkGLcall("glBlitFramebuffer()");
1132
1133     if (wined3d_settings.strict_draw_ordering)
1134         gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
1135
1136     context_release(context);
1137 }
1138
1139 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1140  * Depth / stencil is not supported. */
1141 static void surface_blt_fbo(const struct wined3d_device *device, enum wined3d_texture_filter_type filter,
1142         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1143         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1144 {
1145     const struct wined3d_gl_info *gl_info;
1146     struct wined3d_context *context;
1147     RECT src_rect, dst_rect;
1148     GLenum gl_filter;
1149     GLenum buffer;
1150
1151     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1152     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1153             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1154     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1155             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1156
1157     src_rect = *src_rect_in;
1158     dst_rect = *dst_rect_in;
1159
1160     switch (filter)
1161     {
1162         case WINED3D_TEXF_LINEAR:
1163             gl_filter = GL_LINEAR;
1164             break;
1165
1166         default:
1167             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1168         case WINED3D_TEXF_NONE:
1169         case WINED3D_TEXF_POINT:
1170             gl_filter = GL_NEAREST;
1171             break;
1172     }
1173
1174     /* Resolve the source surface first if needed. */
1175     if (src_location == SFLAG_INRB_MULTISAMPLE
1176             && (src_surface->resource.format->id != dst_surface->resource.format->id
1177                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1178                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1179         src_location = SFLAG_INRB_RESOLVED;
1180
1181     /* Make sure the locations are up-to-date. Loading the destination
1182      * surface isn't required if the entire surface is overwritten. (And is
1183      * in fact harmful if we're being called by surface_load_location() with
1184      * the purpose of loading the destination surface.) */
1185     surface_load_location(src_surface, src_location, NULL);
1186     if (!surface_is_full_rect(dst_surface, &dst_rect))
1187         surface_load_location(dst_surface, dst_location, NULL);
1188
1189     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1190     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1191     else context = context_acquire(device, NULL);
1192
1193     if (!context->valid)
1194     {
1195         context_release(context);
1196         WARN("Invalid context, skipping blit.\n");
1197         return;
1198     }
1199
1200     gl_info = context->gl_info;
1201
1202     if (src_location == SFLAG_INDRAWABLE)
1203     {
1204         TRACE("Source surface %p is onscreen.\n", src_surface);
1205         buffer = surface_get_gl_buffer(src_surface);
1206         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1207     }
1208     else
1209     {
1210         TRACE("Source surface %p is offscreen.\n", src_surface);
1211         buffer = GL_COLOR_ATTACHMENT0;
1212     }
1213
1214     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1215     gl_info->gl_ops.gl.p_glReadBuffer(buffer);
1216     checkGLcall("glReadBuffer()");
1217     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1218
1219     if (dst_location == SFLAG_INDRAWABLE)
1220     {
1221         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1222         buffer = surface_get_gl_buffer(dst_surface);
1223         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1224     }
1225     else
1226     {
1227         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1228         buffer = GL_COLOR_ATTACHMENT0;
1229     }
1230
1231     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1232     context_set_draw_buffer(context, buffer);
1233     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1234     context_invalidate_state(context, STATE_FRAMEBUFFER);
1235
1236     gl_info->gl_ops.gl.p_glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1237     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1238     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1239     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1240     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1241
1242     gl_info->gl_ops.gl.p_glDisable(GL_SCISSOR_TEST);
1243     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1244
1245     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1246             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1247     checkGLcall("glBlitFramebuffer()");
1248
1249     if (wined3d_settings.strict_draw_ordering
1250             || (dst_location == SFLAG_INDRAWABLE
1251             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1252         gl_info->gl_ops.gl.p_glFlush();
1253
1254     context_release(context);
1255 }
1256
1257 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1258         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
1259         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
1260 {
1261     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1262         return FALSE;
1263
1264     /* Source and/or destination need to be on the GL side */
1265     if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
1266         return FALSE;
1267
1268     switch (blit_op)
1269     {
1270         case WINED3D_BLIT_OP_COLOR_BLIT:
1271             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1272                 return FALSE;
1273             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1274                 return FALSE;
1275             break;
1276
1277         case WINED3D_BLIT_OP_DEPTH_BLIT:
1278             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1279                 return FALSE;
1280             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1281                 return FALSE;
1282             break;
1283
1284         default:
1285             return FALSE;
1286     }
1287
1288     if (!(src_format->id == dst_format->id
1289             || (is_identity_fixup(src_format->color_fixup)
1290             && is_identity_fixup(dst_format->color_fixup))))
1291         return FALSE;
1292
1293     return TRUE;
1294 }
1295
1296 /* This function checks if the primary render target uses the 8bit paletted format. */
1297 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1298 {
1299     if (device->fb.render_targets && device->fb.render_targets[0])
1300     {
1301         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1302         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1303                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1304             return TRUE;
1305     }
1306     return FALSE;
1307 }
1308
1309 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1310         DWORD color, struct wined3d_color *float_color)
1311 {
1312     const struct wined3d_format *format = surface->resource.format;
1313     const struct wined3d_device *device = surface->resource.device;
1314
1315     switch (format->id)
1316     {
1317         case WINED3DFMT_P8_UINT:
1318             if (surface->palette)
1319             {
1320                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1321                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1322                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1323             }
1324             else
1325             {
1326                 float_color->r = 0.0f;
1327                 float_color->g = 0.0f;
1328                 float_color->b = 0.0f;
1329             }
1330             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1331             break;
1332
1333         case WINED3DFMT_B5G6R5_UNORM:
1334             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1335             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1336             float_color->b = (color & 0x1f) / 31.0f;
1337             float_color->a = 1.0f;
1338             break;
1339
1340         case WINED3DFMT_B8G8R8_UNORM:
1341         case WINED3DFMT_B8G8R8X8_UNORM:
1342             float_color->r = D3DCOLOR_R(color);
1343             float_color->g = D3DCOLOR_G(color);
1344             float_color->b = D3DCOLOR_B(color);
1345             float_color->a = 1.0f;
1346             break;
1347
1348         case WINED3DFMT_B8G8R8A8_UNORM:
1349             float_color->r = D3DCOLOR_R(color);
1350             float_color->g = D3DCOLOR_G(color);
1351             float_color->b = D3DCOLOR_B(color);
1352             float_color->a = D3DCOLOR_A(color);
1353             break;
1354
1355         default:
1356             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1357             return FALSE;
1358     }
1359
1360     return TRUE;
1361 }
1362
1363 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1364 {
1365     const struct wined3d_format *format = surface->resource.format;
1366
1367     switch (format->id)
1368     {
1369         case WINED3DFMT_S1_UINT_D15_UNORM:
1370             *float_depth = depth / (float)0x00007fff;
1371             break;
1372
1373         case WINED3DFMT_D16_UNORM:
1374             *float_depth = depth / (float)0x0000ffff;
1375             break;
1376
1377         case WINED3DFMT_D24_UNORM_S8_UINT:
1378         case WINED3DFMT_X8D24_UNORM:
1379             *float_depth = depth / (float)0x00ffffff;
1380             break;
1381
1382         case WINED3DFMT_D32_UNORM:
1383             *float_depth = depth / (float)0xffffffff;
1384             break;
1385
1386         default:
1387             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1388             return FALSE;
1389     }
1390
1391     return TRUE;
1392 }
1393
1394 /* Do not call while under the GL lock. */
1395 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1396 {
1397     const struct wined3d_resource *resource = &surface->resource;
1398     struct wined3d_device *device = resource->device;
1399     const struct blit_shader *blitter;
1400
1401     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1402             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1403     if (!blitter)
1404     {
1405         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1406         return WINED3DERR_INVALIDCALL;
1407     }
1408
1409     return blitter->depth_fill(device, surface, rect, depth);
1410 }
1411
1412 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect,
1413         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect)
1414 {
1415     struct wined3d_device *device = src_surface->resource.device;
1416
1417     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1418             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1419             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1420         return WINED3DERR_INVALIDCALL;
1421
1422     surface_depth_blt_fbo(device, src_surface, src_location, src_rect, dst_surface, dst_location, dst_rect);
1423
1424     surface_modify_ds_location(dst_surface, dst_location,
1425             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1426
1427     return WINED3D_OK;
1428 }
1429
1430 /* Do not call while under the GL lock. */
1431 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1432         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1433         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
1434 {
1435     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1436     struct wined3d_device *device = dst_surface->resource.device;
1437     DWORD src_ds_flags, dst_ds_flags;
1438     RECT src_rect, dst_rect;
1439     BOOL scale, convert;
1440
1441     static const DWORD simple_blit = WINEDDBLT_ASYNC
1442             | WINEDDBLT_COLORFILL
1443             | WINEDDBLT_WAIT
1444             | WINEDDBLT_DEPTHFILL
1445             | WINEDDBLT_DONOTWAIT;
1446
1447     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1448             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1449             flags, fx, debug_d3dtexturefiltertype(filter));
1450     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1451
1452     if (fx)
1453     {
1454         TRACE("dwSize %#x.\n", fx->dwSize);
1455         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1456         TRACE("dwROP %#x.\n", fx->dwROP);
1457         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1458         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1459         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1460         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1461         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1462         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1463         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1464         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1465         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1466         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1467         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1468         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1469         TRACE("dwReserved %#x.\n", fx->dwReserved);
1470         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1471         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1472         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1473         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1474         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1475         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1476                 fx->ddckDestColorkey.color_space_low_value,
1477                 fx->ddckDestColorkey.color_space_high_value);
1478         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1479                 fx->ddckSrcColorkey.color_space_low_value,
1480                 fx->ddckSrcColorkey.color_space_high_value);
1481     }
1482
1483     if (dst_surface->resource.map_count || (src_surface && src_surface->resource.map_count))
1484     {
1485         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1486         return WINEDDERR_SURFACEBUSY;
1487     }
1488
1489     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1490
1491     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1492             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1493             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1494             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1495             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1496     {
1497         WARN("The application gave us a bad destination rectangle.\n");
1498         return WINEDDERR_INVALIDRECT;
1499     }
1500
1501     if (src_surface)
1502     {
1503         surface_get_rect(src_surface, src_rect_in, &src_rect);
1504
1505         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1506                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1507                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1508                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1509                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1510         {
1511             WARN("Application gave us bad source rectangle for Blt.\n");
1512             return WINEDDERR_INVALIDRECT;
1513         }
1514     }
1515     else
1516     {
1517         memset(&src_rect, 0, sizeof(src_rect));
1518     }
1519
1520     if (!fx || !(fx->dwDDFX))
1521         flags &= ~WINEDDBLT_DDFX;
1522
1523     if (flags & WINEDDBLT_WAIT)
1524         flags &= ~WINEDDBLT_WAIT;
1525
1526     if (flags & WINEDDBLT_ASYNC)
1527     {
1528         static unsigned int once;
1529
1530         if (!once++)
1531             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1532         flags &= ~WINEDDBLT_ASYNC;
1533     }
1534
1535     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1536     if (flags & WINEDDBLT_DONOTWAIT)
1537     {
1538         static unsigned int once;
1539
1540         if (!once++)
1541             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1542         flags &= ~WINEDDBLT_DONOTWAIT;
1543     }
1544
1545     if (!device->d3d_initialized)
1546     {
1547         WARN("D3D not initialized, using fallback.\n");
1548         goto cpu;
1549     }
1550
1551     /* We want to avoid invalidating the sysmem location for converted
1552      * surfaces, since otherwise we'd have to convert the data back when
1553      * locking them. */
1554     if (dst_surface->flags & SFLAG_CONVERTED)
1555     {
1556         WARN_(d3d_perf)("Converted surface, using CPU blit.\n");
1557         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1558     }
1559
1560     if (flags & ~simple_blit)
1561     {
1562         WARN_(d3d_perf)("Using fallback for complex blit (%#x).\n", flags);
1563         goto fallback;
1564     }
1565
1566     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1567         src_swapchain = src_surface->container.u.swapchain;
1568     else
1569         src_swapchain = NULL;
1570
1571     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1572         dst_swapchain = dst_surface->container.u.swapchain;
1573     else
1574         dst_swapchain = NULL;
1575
1576     /* This isn't strictly needed. FBO blits for example could deal with
1577      * cross-swapchain blits by first downloading the source to a texture
1578      * before switching to the destination context. We just have this here to
1579      * not have to deal with the issue, since cross-swapchain blits should be
1580      * rare. */
1581     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1582     {
1583         FIXME("Using fallback for cross-swapchain blit.\n");
1584         goto fallback;
1585     }
1586
1587     scale = src_surface
1588             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1589             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1590     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1591
1592     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1593     if (src_surface)
1594         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1595     else
1596         src_ds_flags = 0;
1597
1598     if (src_ds_flags || dst_ds_flags)
1599     {
1600         if (flags & WINEDDBLT_DEPTHFILL)
1601         {
1602             float depth;
1603
1604             TRACE("Depth fill.\n");
1605
1606             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1607                 return WINED3DERR_INVALIDCALL;
1608
1609             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1610                 return WINED3D_OK;
1611         }
1612         else
1613         {
1614             if (src_ds_flags != dst_ds_flags)
1615             {
1616                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1617                 return WINED3DERR_INVALIDCALL;
1618             }
1619
1620             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, src_surface->draw_binding, &src_rect,
1621                     dst_surface, dst_surface->draw_binding, &dst_rect)))
1622                 return WINED3D_OK;
1623         }
1624     }
1625     else
1626     {
1627         /* In principle this would apply to depth blits as well, but we don't
1628          * implement those in the CPU blitter at the moment. */
1629         if ((dst_surface->flags & SFLAG_INSYSMEM)
1630                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1631         {
1632             if (scale)
1633                 TRACE("Not doing sysmem blit because of scaling.\n");
1634             else if (convert)
1635                 TRACE("Not doing sysmem blit because of format conversion.\n");
1636             else
1637                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1638         }
1639
1640         if (flags & WINEDDBLT_COLORFILL)
1641         {
1642             struct wined3d_color color;
1643
1644             TRACE("Color fill.\n");
1645
1646             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1647                 goto fallback;
1648
1649             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1650                 return WINED3D_OK;
1651         }
1652         else
1653         {
1654             TRACE("Color blit.\n");
1655
1656             /* Upload */
1657             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1658             {
1659                 if (scale)
1660                     TRACE("Not doing upload because of scaling.\n");
1661                 else if (convert)
1662                     TRACE("Not doing upload because of format conversion.\n");
1663                 else
1664                 {
1665                     POINT dst_point = {dst_rect.left, dst_rect.top};
1666
1667                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1668                     {
1669                         if (!surface_is_offscreen(dst_surface))
1670                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1671                         return WINED3D_OK;
1672                     }
1673                 }
1674             }
1675
1676             /* Use present for back -> front blits. The idea behind this is
1677              * that present is potentially faster than a blit, in particular
1678              * when FBO blits aren't available. Some ddraw applications like
1679              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1680              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1681              * applications can't blit directly to the frontbuffer. */
1682             if (dst_swapchain && dst_swapchain->back_buffers
1683                     && dst_surface == dst_swapchain->front_buffer
1684                     && src_surface == dst_swapchain->back_buffers[0])
1685             {
1686                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1687
1688                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1689
1690                 /* Set the swap effect to COPY, we don't want the backbuffer
1691                  * to become undefined. */
1692                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1693                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1694                 dst_swapchain->desc.swap_effect = swap_effect;
1695
1696                 return WINED3D_OK;
1697             }
1698
1699             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1700                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1701                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1702             {
1703                 TRACE("Using FBO blit.\n");
1704
1705                 surface_blt_fbo(device, filter,
1706                         src_surface, src_surface->draw_binding, &src_rect,
1707                         dst_surface, dst_surface->draw_binding, &dst_rect);
1708                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1709                 return WINED3D_OK;
1710             }
1711
1712             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1713                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1714                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1715             {
1716                 TRACE("Using arbfp blit.\n");
1717
1718                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1719                     return WINED3D_OK;
1720             }
1721         }
1722     }
1723
1724 fallback:
1725
1726     /* Special cases for render targets. */
1727     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1728             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1729     {
1730         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1731                 src_surface, &src_rect, flags, fx, filter)))
1732             return WINED3D_OK;
1733     }
1734
1735 cpu:
1736
1737     /* For the rest call the X11 surface implementation. For render targets
1738      * this should be implemented OpenGL accelerated in BltOverride, other
1739      * blits are rather rare. */
1740     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1741 }
1742
1743 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1744         struct wined3d_surface *render_target)
1745 {
1746     TRACE("surface %p, render_target %p.\n", surface, render_target);
1747
1748     /* TODO: Check surface sizes, pools, etc. */
1749
1750     if (render_target->resource.multisample_type)
1751         return WINED3DERR_INVALIDCALL;
1752
1753     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3D_TEXF_POINT);
1754 }
1755
1756 /* Context activation is done by the caller. */
1757 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1758 {
1759     if (surface->flags & SFLAG_DIBSECTION)
1760     {
1761         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1762     }
1763     else
1764     {
1765         if (!surface->resource.heapMemory)
1766             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1767         else if (!(surface->flags & SFLAG_CLIENT))
1768             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1769                     surface, surface->resource.heapMemory, surface->flags);
1770
1771         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1772                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1773     }
1774
1775     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1776     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1777     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1778             surface->resource.size, surface->resource.allocatedMemory));
1779     checkGLcall("glGetBufferSubDataARB");
1780     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1781     checkGLcall("glDeleteBuffersARB");
1782
1783     surface->pbo = 0;
1784     surface->flags &= ~SFLAG_PBO;
1785 }
1786
1787 static BOOL surface_init_sysmem(struct wined3d_surface *surface)
1788 {
1789     if (!surface->resource.allocatedMemory)
1790     {
1791         if (!surface->resource.heapMemory)
1792         {
1793             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
1794                     surface->resource.size + RESOURCE_ALIGNMENT)))
1795             {
1796                 ERR("Failed to allocate memory.\n");
1797                 return FALSE;
1798             }
1799         }
1800         else if (!(surface->flags & SFLAG_CLIENT))
1801         {
1802             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1803                     surface, surface->resource.heapMemory, surface->flags);
1804         }
1805
1806         surface->resource.allocatedMemory =
1807             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1808     }
1809     else
1810     {
1811         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
1812     }
1813
1814     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1815
1816     return TRUE;
1817 }
1818
1819 /* Do not call while under the GL lock. */
1820 static void surface_unload(struct wined3d_resource *resource)
1821 {
1822     struct wined3d_surface *surface = surface_from_resource(resource);
1823     struct wined3d_renderbuffer_entry *entry, *entry2;
1824     struct wined3d_device *device = resource->device;
1825     const struct wined3d_gl_info *gl_info;
1826     struct wined3d_context *context;
1827
1828     TRACE("surface %p.\n", surface);
1829
1830     if (resource->pool == WINED3D_POOL_DEFAULT)
1831     {
1832         /* Default pool resources are supposed to be destroyed before Reset is called.
1833          * Implicit resources stay however. So this means we have an implicit render target
1834          * or depth stencil. The content may be destroyed, but we still have to tear down
1835          * opengl resources, so we cannot leave early.
1836          *
1837          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1838          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1839          * or the depth stencil into an FBO the texture or render buffer will be removed
1840          * and all flags get lost
1841          */
1842         if (!(surface->flags & SFLAG_PBO))
1843             surface_init_sysmem(surface);
1844         /* We also get here when the ddraw swapchain is destroyed, for example
1845          * for a mode switch. In this case this surface won't necessarily be
1846          * an implicit surface. We have to mark it lost so that the
1847          * application can restore it after the mode switch. */
1848         surface->flags |= SFLAG_LOST;
1849     }
1850     else
1851     {
1852         /* Load the surface into system memory */
1853         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1854         surface_modify_location(surface, surface->draw_binding, FALSE);
1855     }
1856     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1857     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1858     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1859
1860     context = context_acquire(device, NULL);
1861     gl_info = context->gl_info;
1862
1863     /* Destroy PBOs, but load them into real sysmem before */
1864     if (surface->flags & SFLAG_PBO)
1865         surface_remove_pbo(surface, gl_info);
1866
1867     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1868      * all application-created targets the application has to release the surface
1869      * before calling _Reset
1870      */
1871     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1872     {
1873         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1874         list_remove(&entry->entry);
1875         HeapFree(GetProcessHeap(), 0, entry);
1876     }
1877     list_init(&surface->renderbuffers);
1878     surface->current_renderbuffer = NULL;
1879
1880     /* If we're in a texture, the texture name belongs to the texture.
1881      * Otherwise, destroy it. */
1882     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1883     {
1884         gl_info->gl_ops.gl.p_glDeleteTextures(1, &surface->texture_name);
1885         surface->texture_name = 0;
1886         gl_info->gl_ops.gl.p_glDeleteTextures(1, &surface->texture_name_srgb);
1887         surface->texture_name_srgb = 0;
1888     }
1889     if (surface->rb_multisample)
1890     {
1891         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1892         surface->rb_multisample = 0;
1893     }
1894     if (surface->rb_resolved)
1895     {
1896         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1897         surface->rb_resolved = 0;
1898     }
1899
1900     context_release(context);
1901
1902     resource_unload(resource);
1903 }
1904
1905 static const struct wined3d_resource_ops surface_resource_ops =
1906 {
1907     surface_unload,
1908 };
1909
1910 static const struct wined3d_surface_ops surface_ops =
1911 {
1912     surface_private_setup,
1913     surface_realize_palette,
1914     surface_map,
1915     surface_unmap,
1916 };
1917
1918 /*****************************************************************************
1919  * Initializes the GDI surface, aka creates the DIB section we render to
1920  * The DIB section creation is done by calling GetDC, which will create the
1921  * section and releasing the dc to allow the app to use it. The dib section
1922  * will stay until the surface is released
1923  *
1924  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1925  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1926  * avoid confusion in the shared surface code.
1927  *
1928  * Returns:
1929  *  WINED3D_OK on success
1930  *  The return values of called methods on failure
1931  *
1932  *****************************************************************************/
1933 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1934 {
1935     HRESULT hr;
1936
1937     TRACE("surface %p.\n", surface);
1938
1939     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1940     {
1941         ERR("Overlays not yet supported by GDI surfaces.\n");
1942         return WINED3DERR_INVALIDCALL;
1943     }
1944
1945     /* Sysmem textures have memory already allocated - release it,
1946      * this avoids an unnecessary memcpy. */
1947     hr = surface_create_dib_section(surface);
1948     if (SUCCEEDED(hr))
1949     {
1950         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1951         surface->resource.heapMemory = NULL;
1952         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1953     }
1954
1955     /* We don't mind the nonpow2 stuff in GDI. */
1956     surface->pow2Width = surface->resource.width;
1957     surface->pow2Height = surface->resource.height;
1958
1959     return WINED3D_OK;
1960 }
1961
1962 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1963 {
1964     struct wined3d_palette *palette = surface->palette;
1965
1966     TRACE("surface %p.\n", surface);
1967
1968     if (!palette) return;
1969
1970     if (surface->flags & SFLAG_DIBSECTION)
1971     {
1972         RGBQUAD col[256];
1973         unsigned int i;
1974
1975         TRACE("Updating the DC's palette.\n");
1976
1977         for (i = 0; i < 256; ++i)
1978         {
1979             col[i].rgbRed = palette->palents[i].peRed;
1980             col[i].rgbGreen = palette->palents[i].peGreen;
1981             col[i].rgbBlue = palette->palents[i].peBlue;
1982             col[i].rgbReserved = 0;
1983         }
1984         SetDIBColorTable(surface->hDC, 0, 256, col);
1985     }
1986
1987     /* Update the image because of the palette change. Some games like e.g.
1988      * Red Alert call SetEntries a lot to implement fading. */
1989     /* Tell the swapchain to update the screen. */
1990     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1991     {
1992         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1993         if (surface == swapchain->front_buffer)
1994         {
1995             x11_copy_to_screen(swapchain, NULL);
1996         }
1997     }
1998 }
1999
2000 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
2001 {
2002     TRACE("surface %p, rect %s, flags %#x.\n",
2003             surface, wine_dbgstr_rect(rect), flags);
2004
2005     if (!(surface->flags & SFLAG_DIBSECTION))
2006     {
2007         HRESULT hr;
2008
2009         /* This happens on gdi surfaces if the application set a user pointer
2010          * and resets it. Recreate the DIB section. */
2011         if (FAILED(hr = surface_create_dib_section(surface)))
2012         {
2013             ERR("Failed to create dib section, hr %#x.\n", hr);
2014             return;
2015         }
2016         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
2017         surface->resource.heapMemory = NULL;
2018         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2019     }
2020 }
2021
2022 static void gdi_surface_unmap(struct wined3d_surface *surface)
2023 {
2024     TRACE("surface %p.\n", surface);
2025
2026     /* Tell the swapchain to update the screen. */
2027     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2028     {
2029         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2030         if (surface == swapchain->front_buffer)
2031         {
2032             x11_copy_to_screen(swapchain, &surface->lockedRect);
2033         }
2034     }
2035
2036     memset(&surface->lockedRect, 0, sizeof(RECT));
2037 }
2038
2039 static const struct wined3d_surface_ops gdi_surface_ops =
2040 {
2041     gdi_surface_private_setup,
2042     gdi_surface_realize_palette,
2043     gdi_surface_map,
2044     gdi_surface_unmap,
2045 };
2046
2047 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2048 {
2049     GLuint *name;
2050     DWORD flag;
2051
2052     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2053
2054     if(srgb)
2055     {
2056         name = &surface->texture_name_srgb;
2057         flag = SFLAG_INSRGBTEX;
2058     }
2059     else
2060     {
2061         name = &surface->texture_name;
2062         flag = SFLAG_INTEXTURE;
2063     }
2064
2065     if (!*name && new_name)
2066     {
2067         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2068          * surface has no texture name yet. See if we can get rid of this. */
2069         if (surface->flags & flag)
2070         {
2071             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2072             surface_modify_location(surface, flag, FALSE);
2073         }
2074     }
2075
2076     *name = new_name;
2077     surface_force_reload(surface);
2078 }
2079
2080 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target, GLint level)
2081 {
2082     TRACE("surface %p, target %#x.\n", surface, target);
2083
2084     if (surface->texture_target != target)
2085     {
2086         if (target == GL_TEXTURE_RECTANGLE_ARB)
2087         {
2088             surface->flags &= ~SFLAG_NORMCOORD;
2089         }
2090         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2091         {
2092             surface->flags |= SFLAG_NORMCOORD;
2093         }
2094     }
2095     surface->texture_target = target;
2096     surface->texture_level = level;
2097     surface_force_reload(surface);
2098 }
2099
2100 /* This call just downloads data, the caller is responsible for binding the
2101  * correct texture. */
2102 /* Context activation is done by the caller. */
2103 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2104 {
2105     const struct wined3d_format *format = surface->resource.format;
2106
2107     /* Only support read back of converted P8 surfaces. */
2108     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2109     {
2110         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2111         return;
2112     }
2113
2114     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2115     {
2116         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2117                 surface, surface->texture_level, format->glFormat, format->glType,
2118                 surface->resource.allocatedMemory);
2119
2120         if (surface->flags & SFLAG_PBO)
2121         {
2122             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2123             checkGLcall("glBindBufferARB");
2124             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2125             checkGLcall("glGetCompressedTexImageARB");
2126             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2127             checkGLcall("glBindBufferARB");
2128         }
2129         else
2130         {
2131             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2132                     surface->texture_level, surface->resource.allocatedMemory));
2133             checkGLcall("glGetCompressedTexImageARB");
2134         }
2135     }
2136     else
2137     {
2138         void *mem;
2139         GLenum gl_format = format->glFormat;
2140         GLenum gl_type = format->glType;
2141         int src_pitch = 0;
2142         int dst_pitch = 0;
2143
2144         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2145         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2146         {
2147             gl_format = GL_ALPHA;
2148             gl_type = GL_UNSIGNED_BYTE;
2149         }
2150
2151         if (surface->flags & SFLAG_NONPOW2)
2152         {
2153             unsigned char alignment = surface->resource.device->surface_alignment;
2154             src_pitch = format->byte_count * surface->pow2Width;
2155             dst_pitch = wined3d_surface_get_pitch(surface);
2156             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2157             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2158         }
2159         else
2160         {
2161             mem = surface->resource.allocatedMemory;
2162         }
2163
2164         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2165                 surface, surface->texture_level, gl_format, gl_type, mem);
2166
2167         if (surface->flags & SFLAG_PBO)
2168         {
2169             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2170             checkGLcall("glBindBufferARB");
2171
2172             gl_info->gl_ops.gl.p_glGetTexImage(surface->texture_target, surface->texture_level,
2173                     gl_format, gl_type, NULL);
2174             checkGLcall("glGetTexImage");
2175
2176             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2177             checkGLcall("glBindBufferARB");
2178         }
2179         else
2180         {
2181             gl_info->gl_ops.gl.p_glGetTexImage(surface->texture_target, surface->texture_level,
2182                     gl_format, gl_type, mem);
2183             checkGLcall("glGetTexImage");
2184         }
2185
2186         if (surface->flags & SFLAG_NONPOW2)
2187         {
2188             const BYTE *src_data;
2189             BYTE *dst_data;
2190             UINT y;
2191             /*
2192              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2193              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2194              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2195              *
2196              * We're doing this...
2197              *
2198              * instead of boxing the texture :
2199              * |<-texture width ->|  -->pow2width|   /\
2200              * |111111111111111111|              |   |
2201              * |222 Texture 222222| boxed empty  | texture height
2202              * |3333 Data 33333333|              |   |
2203              * |444444444444444444|              |   \/
2204              * -----------------------------------   |
2205              * |     boxed  empty | boxed empty  | pow2height
2206              * |                  |              |   \/
2207              * -----------------------------------
2208              *
2209              *
2210              * we're repacking the data to the expected texture width
2211              *
2212              * |<-texture width ->|  -->pow2width|   /\
2213              * |111111111111111111222222222222222|   |
2214              * |222333333333333333333444444444444| texture height
2215              * |444444                           |   |
2216              * |                                 |   \/
2217              * |                                 |   |
2218              * |            empty                | pow2height
2219              * |                                 |   \/
2220              * -----------------------------------
2221              *
2222              * == is the same as
2223              *
2224              * |<-texture width ->|    /\
2225              * |111111111111111111|
2226              * |222222222222222222|texture height
2227              * |333333333333333333|
2228              * |444444444444444444|    \/
2229              * --------------------
2230              *
2231              * this also means that any references to allocatedMemory should work with the data as if were a
2232              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2233              *
2234              * internally the texture is still stored in a boxed format so any references to textureName will
2235              * get a boxed texture with width pow2width and not a texture of width resource.width.
2236              *
2237              * Performance should not be an issue, because applications normally do not lock the surfaces when
2238              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2239              * and doesn't have to be re-read. */
2240             src_data = mem;
2241             dst_data = surface->resource.allocatedMemory;
2242             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2243             for (y = 1; y < surface->resource.height; ++y)
2244             {
2245                 /* skip the first row */
2246                 src_data += src_pitch;
2247                 dst_data += dst_pitch;
2248                 memcpy(dst_data, src_data, dst_pitch);
2249             }
2250
2251             HeapFree(GetProcessHeap(), 0, mem);
2252         }
2253     }
2254
2255     /* Surface has now been downloaded */
2256     surface->flags |= SFLAG_INSYSMEM;
2257 }
2258
2259 /* This call just uploads data, the caller is responsible for binding the
2260  * correct texture. */
2261 /* Context activation is done by the caller. */
2262 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2263         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2264         BOOL srgb, const struct wined3d_bo_address *data)
2265 {
2266     UINT update_w = src_rect->right - src_rect->left;
2267     UINT update_h = src_rect->bottom - src_rect->top;
2268
2269     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2270             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2271             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2272
2273     if (surface->resource.map_count)
2274     {
2275         WARN("Uploading a surface that is currently mapped, setting SFLAG_PIN_SYSMEM.\n");
2276         surface->flags |= SFLAG_PIN_SYSMEM;
2277     }
2278
2279     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2280     {
2281         update_h *= format->height_scale.numerator;
2282         update_h /= format->height_scale.denominator;
2283     }
2284
2285     if (data->buffer_object)
2286     {
2287         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2288         checkGLcall("glBindBufferARB");
2289     }
2290
2291     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2292     {
2293         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2294         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2295         const BYTE *addr = data->addr;
2296         GLenum internal;
2297
2298         addr += (src_rect->top / format->block_height) * src_pitch;
2299         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2300
2301         if (srgb)
2302             internal = format->glGammaInternal;
2303         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2304             internal = format->rtInternal;
2305         else
2306             internal = format->glInternal;
2307
2308         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2309                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2310                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2311
2312         if (row_length == src_pitch)
2313         {
2314             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2315                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2316         }
2317         else
2318         {
2319             UINT row, y;
2320
2321             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2322              * can't use the unpack row length like below. */
2323             for (row = 0, y = dst_point->y; row < row_count; ++row)
2324             {
2325                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2326                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2327                 y += format->block_height;
2328                 addr += src_pitch;
2329             }
2330         }
2331         checkGLcall("glCompressedTexSubImage2DARB");
2332     }
2333     else
2334     {
2335         const BYTE *addr = data->addr;
2336
2337         addr += src_rect->top * src_pitch;
2338         addr += src_rect->left * format->byte_count;
2339
2340         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2341                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2342                 update_w, update_h, format->glFormat, format->glType, addr);
2343
2344         gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2345         gl_info->gl_ops.gl.p_glTexSubImage2D(surface->texture_target, surface->texture_level,
2346                 dst_point->x, dst_point->y, update_w, update_h, format->glFormat, format->glType, addr);
2347         gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2348         checkGLcall("glTexSubImage2D");
2349     }
2350
2351     if (data->buffer_object)
2352     {
2353         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2354         checkGLcall("glBindBufferARB");
2355     }
2356
2357     if (wined3d_settings.strict_draw_ordering)
2358         gl_info->gl_ops.gl.p_glFlush();
2359
2360     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2361     {
2362         struct wined3d_device *device = surface->resource.device;
2363         unsigned int i;
2364
2365         for (i = 0; i < device->context_count; ++i)
2366         {
2367             context_surface_update(device->contexts[i], surface);
2368         }
2369     }
2370 }
2371
2372 static HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck, BOOL use_texturing,
2373         struct wined3d_format *format, enum wined3d_conversion_type *conversion_type)
2374 {
2375     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2376     const struct wined3d_device *device = surface->resource.device;
2377     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
2378     BOOL blit_supported = FALSE;
2379
2380     /* Copy the default values from the surface. Below we might perform fixups */
2381     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
2382     *format = *surface->resource.format;
2383     *conversion_type = WINED3D_CT_NONE;
2384
2385     /* Ok, now look if we have to do any conversion */
2386     switch (surface->resource.format->id)
2387     {
2388         case WINED3DFMT_P8_UINT:
2389             /* Below the call to blit_supported is disabled for Wine 1.2
2390              * because the function isn't operating correctly yet. At the
2391              * moment 8-bit blits are handled in software and if certain GL
2392              * extensions are around, surface conversion is performed at
2393              * upload time. The blit_supported call recognizes it as a
2394              * destination fixup. This type of upload 'fixup' and 8-bit to
2395              * 8-bit blits need to be handled by the blit_shader.
2396              * TODO: get rid of this #if 0. */
2397 #if 0
2398             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
2399                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
2400                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
2401 #endif
2402             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
2403
2404             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
2405              * texturing. Further also use conversion in case of color keying.
2406              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
2407              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
2408              * conflicts with this.
2409              */
2410             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
2411                     || colorkey_active || !use_texturing)
2412             {
2413                 format->glFormat = GL_RGBA;
2414                 format->glInternal = GL_RGBA;
2415                 format->glType = GL_UNSIGNED_BYTE;
2416                 format->conv_byte_count = 4;
2417                 if (colorkey_active)
2418                     *conversion_type = WINED3D_CT_PALETTED_CK;
2419                 else
2420                     *conversion_type = WINED3D_CT_PALETTED;
2421             }
2422             break;
2423
2424         case WINED3DFMT_B2G3R3_UNORM:
2425             /* **********************
2426                 GL_UNSIGNED_BYTE_3_3_2
2427                 ********************** */
2428             if (colorkey_active) {
2429                 /* This texture format will never be used.. So do not care about color keying
2430                     up until the point in time it will be needed :-) */
2431                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
2432             }
2433             break;
2434
2435         case WINED3DFMT_B5G6R5_UNORM:
2436             if (colorkey_active)
2437             {
2438                 *conversion_type = WINED3D_CT_CK_565;
2439                 format->glFormat = GL_RGBA;
2440                 format->glInternal = GL_RGB5_A1;
2441                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
2442                 format->conv_byte_count = 2;
2443             }
2444             break;
2445
2446         case WINED3DFMT_B5G5R5X1_UNORM:
2447             if (colorkey_active)
2448             {
2449                 *conversion_type = WINED3D_CT_CK_5551;
2450                 format->glFormat = GL_BGRA;
2451                 format->glInternal = GL_RGB5_A1;
2452                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
2453                 format->conv_byte_count = 2;
2454             }
2455             break;
2456
2457         case WINED3DFMT_B8G8R8_UNORM:
2458             if (colorkey_active)
2459             {
2460                 *conversion_type = WINED3D_CT_CK_RGB24;
2461                 format->glFormat = GL_RGBA;
2462                 format->glInternal = GL_RGBA8;
2463                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
2464                 format->conv_byte_count = 4;
2465             }
2466             break;
2467
2468         case WINED3DFMT_B8G8R8X8_UNORM:
2469             if (colorkey_active)
2470             {
2471                 *conversion_type = WINED3D_CT_RGB32_888;
2472                 format->glFormat = GL_RGBA;
2473                 format->glInternal = GL_RGBA8;
2474                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
2475                 format->conv_byte_count = 4;
2476             }
2477             break;
2478
2479         case WINED3DFMT_B8G8R8A8_UNORM:
2480             if (colorkey_active)
2481             {
2482                 *conversion_type = WINED3D_CT_CK_ARGB32;
2483                 format->conv_byte_count = 4;
2484             }
2485             break;
2486
2487         default:
2488             break;
2489     }
2490
2491     if (*conversion_type != WINED3D_CT_NONE)
2492     {
2493         format->rtInternal = format->glInternal;
2494         format->glGammaInternal = format->glInternal;
2495     }
2496
2497     return WINED3D_OK;
2498 }
2499
2500 static BOOL surface_check_block_align(struct wined3d_surface *surface, const RECT *rect)
2501 {
2502     UINT width_mask, height_mask;
2503
2504     if (!rect->left && !rect->top
2505             && rect->right == surface->resource.width
2506             && rect->bottom == surface->resource.height)
2507         return TRUE;
2508
2509     /* This assumes power of two block sizes, but NPOT block sizes would be
2510      * silly anyway. */
2511     width_mask = surface->resource.format->block_width - 1;
2512     height_mask = surface->resource.format->block_height - 1;
2513
2514     if (!(rect->left & width_mask) && !(rect->top & height_mask)
2515             && !(rect->right & width_mask) && !(rect->bottom & height_mask))
2516         return TRUE;
2517
2518     return FALSE;
2519 }
2520
2521 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2522         struct wined3d_surface *src_surface, const RECT *src_rect)
2523 {
2524     const struct wined3d_format *src_format;
2525     const struct wined3d_format *dst_format;
2526     const struct wined3d_gl_info *gl_info;
2527     enum wined3d_conversion_type convert;
2528     struct wined3d_context *context;
2529     struct wined3d_bo_address data;
2530     struct wined3d_format format;
2531     UINT update_w, update_h;
2532     UINT dst_w, dst_h;
2533     RECT r, dst_rect;
2534     UINT src_pitch;
2535     POINT p;
2536
2537     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2538             dst_surface, wine_dbgstr_point(dst_point),
2539             src_surface, wine_dbgstr_rect(src_rect));
2540
2541     src_format = src_surface->resource.format;
2542     dst_format = dst_surface->resource.format;
2543
2544     if (src_format->id != dst_format->id)
2545     {
2546         WARN("Source and destination surfaces should have the same format.\n");
2547         return WINED3DERR_INVALIDCALL;
2548     }
2549
2550     if (!dst_point)
2551     {
2552         p.x = 0;
2553         p.y = 0;
2554         dst_point = &p;
2555     }
2556     else if (dst_point->x < 0 || dst_point->y < 0)
2557     {
2558         WARN("Invalid destination point.\n");
2559         return WINED3DERR_INVALIDCALL;
2560     }
2561
2562     if (!src_rect)
2563     {
2564         r.left = 0;
2565         r.top = 0;
2566         r.right = src_surface->resource.width;
2567         r.bottom = src_surface->resource.height;
2568         src_rect = &r;
2569     }
2570     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2571             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2572     {
2573         WARN("Invalid source rectangle.\n");
2574         return WINED3DERR_INVALIDCALL;
2575     }
2576
2577     dst_w = dst_surface->resource.width;
2578     dst_h = dst_surface->resource.height;
2579
2580     update_w = src_rect->right - src_rect->left;
2581     update_h = src_rect->bottom - src_rect->top;
2582
2583     if (update_w > dst_w || dst_point->x > dst_w - update_w
2584             || update_h > dst_h || dst_point->y > dst_h - update_h)
2585     {
2586         WARN("Destination out of bounds.\n");
2587         return WINED3DERR_INVALIDCALL;
2588     }
2589
2590     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS) && !surface_check_block_align(src_surface, src_rect))
2591     {
2592         WARN("Source rectangle not block-aligned.\n");
2593         return WINED3DERR_INVALIDCALL;
2594     }
2595
2596     SetRect(&dst_rect, dst_point->x, dst_point->y, dst_point->x + update_w, dst_point->y + update_h);
2597     if ((dst_format->flags & WINED3DFMT_FLAG_BLOCKS) && !surface_check_block_align(dst_surface, &dst_rect))
2598     {
2599         WARN("Destination rectangle not block-aligned.\n");
2600         return WINED3DERR_INVALIDCALL;
2601     }
2602
2603     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2604     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2605     if (convert != WINED3D_CT_NONE || format.convert)
2606         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3D_TEXF_POINT);
2607
2608     context = context_acquire(dst_surface->resource.device, NULL);
2609     gl_info = context->gl_info;
2610
2611     /* Only load the surface for partial updates. For newly allocated texture
2612      * the texture wouldn't be the current location, and we'd upload zeroes
2613      * just to overwrite them again. */
2614     if (update_w == dst_w && update_h == dst_h)
2615         surface_prepare_texture(dst_surface, context, FALSE);
2616     else
2617         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2618     surface_bind(dst_surface, context, FALSE);
2619
2620     data.buffer_object = src_surface->pbo;
2621     data.addr = src_surface->resource.allocatedMemory;
2622     src_pitch = wined3d_surface_get_pitch(src_surface);
2623
2624     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2625
2626     invalidate_active_texture(dst_surface->resource.device, context);
2627
2628     context_release(context);
2629
2630     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2631     return WINED3D_OK;
2632 }
2633
2634 /* This call just allocates the texture, the caller is responsible for binding
2635  * the correct texture. */
2636 /* Context activation is done by the caller. */
2637 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2638         const struct wined3d_format *format, BOOL srgb)
2639 {
2640     BOOL enable_client_storage = FALSE;
2641     GLsizei width = surface->pow2Width;
2642     GLsizei height = surface->pow2Height;
2643     const BYTE *mem = NULL;
2644     GLenum internal;
2645
2646     if (srgb)
2647     {
2648         internal = format->glGammaInternal;
2649     }
2650     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2651     {
2652         internal = format->rtInternal;
2653     }
2654     else
2655     {
2656         internal = format->glInternal;
2657     }
2658
2659     if (!internal)
2660         FIXME("No GL internal format for format %s.\n", debug_d3dformat(format->id));
2661
2662     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2663     {
2664         height *= format->height_scale.numerator;
2665         height /= format->height_scale.denominator;
2666     }
2667
2668     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2669             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2670             internal, width, height, format->glFormat, format->glType);
2671
2672     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2673     {
2674         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2675                 || !surface->resource.allocatedMemory)
2676         {
2677             /* In some cases we want to disable client storage.
2678              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2679              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2680              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2681              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2682              */
2683             gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2684             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2685             surface->flags &= ~SFLAG_CLIENT;
2686             enable_client_storage = TRUE;
2687         }
2688         else
2689         {
2690             surface->flags |= SFLAG_CLIENT;
2691
2692             /* Point OpenGL to our allocated texture memory. Do not use
2693              * resource.allocatedMemory here because it might point into a
2694              * PBO. Instead use heapMemory, but get the alignment right. */
2695             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2696                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2697         }
2698     }
2699
2700     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2701     {
2702         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2703                 internal, width, height, 0, surface->resource.size, mem));
2704         checkGLcall("glCompressedTexImage2DARB");
2705     }
2706     else
2707     {
2708         gl_info->gl_ops.gl.p_glTexImage2D(surface->texture_target, surface->texture_level,
2709                 internal, width, height, 0, format->glFormat, format->glType, mem);
2710         checkGLcall("glTexImage2D");
2711     }
2712
2713     if (enable_client_storage)
2714     {
2715         gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2716         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2717     }
2718 }
2719
2720 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2721  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2722 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2723 /* Context activation is done by the caller. */
2724 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2725 {
2726     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2727     struct wined3d_renderbuffer_entry *entry;
2728     GLuint renderbuffer = 0;
2729     unsigned int src_width, src_height;
2730     unsigned int width, height;
2731
2732     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2733     {
2734         width = rt->pow2Width;
2735         height = rt->pow2Height;
2736     }
2737     else
2738     {
2739         width = surface->pow2Width;
2740         height = surface->pow2Height;
2741     }
2742
2743     src_width = surface->pow2Width;
2744     src_height = surface->pow2Height;
2745
2746     /* A depth stencil smaller than the render target is not valid */
2747     if (width > src_width || height > src_height) return;
2748
2749     /* Remove any renderbuffer set if the sizes match */
2750     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2751             || (width == src_width && height == src_height))
2752     {
2753         surface->current_renderbuffer = NULL;
2754         return;
2755     }
2756
2757     /* Look if we've already got a renderbuffer of the correct dimensions */
2758     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2759     {
2760         if (entry->width == width && entry->height == height)
2761         {
2762             renderbuffer = entry->id;
2763             surface->current_renderbuffer = entry;
2764             break;
2765         }
2766     }
2767
2768     if (!renderbuffer)
2769     {
2770         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2771         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2772         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2773                 surface->resource.format->glInternal, width, height);
2774
2775         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2776         entry->width = width;
2777         entry->height = height;
2778         entry->id = renderbuffer;
2779         list_add_head(&surface->renderbuffers, &entry->entry);
2780
2781         surface->current_renderbuffer = entry;
2782     }
2783
2784     checkGLcall("set_compatible_renderbuffer");
2785 }
2786
2787 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2788 {
2789     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2790
2791     TRACE("surface %p.\n", surface);
2792
2793     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2794     {
2795         ERR("Surface %p is not on a swapchain.\n", surface);
2796         return GL_NONE;
2797     }
2798
2799     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2800     {
2801         if (swapchain->render_to_fbo)
2802         {
2803             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2804             return GL_COLOR_ATTACHMENT0;
2805         }
2806         TRACE("Returning GL_BACK\n");
2807         return GL_BACK;
2808     }
2809     else if (surface == swapchain->front_buffer)
2810     {
2811         TRACE("Returning GL_FRONT\n");
2812         return GL_FRONT;
2813     }
2814
2815     FIXME("Higher back buffer, returning GL_BACK\n");
2816     return GL_BACK;
2817 }
2818
2819 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2820 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2821 {
2822     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2823
2824     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2825         /* No partial locking for textures yet. */
2826         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2827
2828     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2829     if (dirty_rect)
2830     {
2831         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2832         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2833         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2834         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2835     }
2836     else
2837     {
2838         surface->dirtyRect.left = 0;
2839         surface->dirtyRect.top = 0;
2840         surface->dirtyRect.right = surface->resource.width;
2841         surface->dirtyRect.bottom = surface->resource.height;
2842     }
2843
2844     /* if the container is a texture then mark it dirty. */
2845     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2846     {
2847         TRACE("Passing to container.\n");
2848         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2849     }
2850 }
2851
2852 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2853 {
2854     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2855     BOOL ck_changed;
2856
2857     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2858
2859     if (surface->resource.pool == WINED3D_POOL_SCRATCH)
2860     {
2861         ERR("Not supported on scratch surfaces.\n");
2862         return WINED3DERR_INVALIDCALL;
2863     }
2864
2865     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2866
2867     /* Reload if either the texture and sysmem have different ideas about the
2868      * color key, or the actual key values changed. */
2869     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2870             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2871             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2872     {
2873         TRACE("Reloading because of color keying\n");
2874         /* To perform the color key conversion we need a sysmem copy of
2875          * the surface. Make sure we have it. */
2876
2877         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2878         /* Make sure the texture is reloaded because of the color key change,
2879          * this kills performance though :( */
2880         /* TODO: This is not necessarily needed with hw palettized texture support. */
2881         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2882         /* Switching color keying on / off may change the internal format. */
2883         if (ck_changed)
2884             surface_force_reload(surface);
2885     }
2886     else if (!(surface->flags & flag))
2887     {
2888         TRACE("Reloading because surface is dirty.\n");
2889     }
2890     else
2891     {
2892         TRACE("surface is already in texture\n");
2893         return WINED3D_OK;
2894     }
2895
2896     /* No partial locking for textures yet. */
2897     surface_load_location(surface, flag, NULL);
2898     surface_evict_sysmem(surface);
2899
2900     return WINED3D_OK;
2901 }
2902
2903 /* See also float_16_to_32() in wined3d_private.h */
2904 static inline unsigned short float_32_to_16(const float *in)
2905 {
2906     int exp = 0;
2907     float tmp = fabsf(*in);
2908     unsigned int mantissa;
2909     unsigned short ret;
2910
2911     /* Deal with special numbers */
2912     if (*in == 0.0f)
2913         return 0x0000;
2914     if (isnan(*in))
2915         return 0x7c01;
2916     if (isinf(*in))
2917         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2918
2919     if (tmp < powf(2, 10))
2920     {
2921         do
2922         {
2923             tmp = tmp * 2.0f;
2924             exp--;
2925         } while (tmp < powf(2, 10));
2926     }
2927     else if (tmp >= powf(2, 11))
2928     {
2929         do
2930         {
2931             tmp /= 2.0f;
2932             exp++;
2933         } while (tmp >= powf(2, 11));
2934     }
2935
2936     mantissa = (unsigned int)tmp;
2937     if (tmp - mantissa >= 0.5f)
2938         ++mantissa; /* Round to nearest, away from zero. */
2939
2940     exp += 10;  /* Normalize the mantissa. */
2941     exp += 15;  /* Exponent is encoded with excess 15. */
2942
2943     if (exp > 30) /* too big */
2944     {
2945         ret = 0x7c00; /* INF */
2946     }
2947     else if (exp <= 0)
2948     {
2949         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2950         while (exp <= 0)
2951         {
2952             mantissa = mantissa >> 1;
2953             ++exp;
2954         }
2955         ret = mantissa & 0x3ff;
2956     }
2957     else
2958     {
2959         ret = (exp << 10) | (mantissa & 0x3ff);
2960     }
2961
2962     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2963     return ret;
2964 }
2965
2966 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2967 {
2968     ULONG refcount;
2969
2970     TRACE("Surface %p, container %p of type %#x.\n",
2971             surface, surface->container.u.base, surface->container.type);
2972
2973     switch (surface->container.type)
2974     {
2975         case WINED3D_CONTAINER_TEXTURE:
2976             return wined3d_texture_incref(surface->container.u.texture);
2977
2978         case WINED3D_CONTAINER_SWAPCHAIN:
2979             return wined3d_swapchain_incref(surface->container.u.swapchain);
2980
2981         default:
2982             ERR("Unhandled container type %#x.\n", surface->container.type);
2983         case WINED3D_CONTAINER_NONE:
2984             break;
2985     }
2986
2987     refcount = InterlockedIncrement(&surface->resource.ref);
2988     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2989
2990     return refcount;
2991 }
2992
2993 /* Do not call while under the GL lock. */
2994 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2995 {
2996     ULONG refcount;
2997
2998     TRACE("Surface %p, container %p of type %#x.\n",
2999             surface, surface->container.u.base, surface->container.type);
3000
3001     switch (surface->container.type)
3002     {
3003         case WINED3D_CONTAINER_TEXTURE:
3004             return wined3d_texture_decref(surface->container.u.texture);
3005
3006         case WINED3D_CONTAINER_SWAPCHAIN:
3007             return wined3d_swapchain_decref(surface->container.u.swapchain);
3008
3009         default:
3010             ERR("Unhandled container type %#x.\n", surface->container.type);
3011         case WINED3D_CONTAINER_NONE:
3012             break;
3013     }
3014
3015     refcount = InterlockedDecrement(&surface->resource.ref);
3016     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
3017
3018     if (!refcount)
3019     {
3020         surface_cleanup(surface);
3021         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
3022
3023         TRACE("Destroyed surface %p.\n", surface);
3024         HeapFree(GetProcessHeap(), 0, surface);
3025     }
3026
3027     return refcount;
3028 }
3029
3030 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
3031 {
3032     return resource_set_priority(&surface->resource, priority);
3033 }
3034
3035 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
3036 {
3037     return resource_get_priority(&surface->resource);
3038 }
3039
3040 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
3041 {
3042     TRACE("surface %p.\n", surface);
3043
3044     if (!surface->resource.device->d3d_initialized)
3045     {
3046         ERR("D3D not initialized.\n");
3047         return;
3048     }
3049
3050     surface_internal_preload(surface, SRGB_ANY);
3051 }
3052
3053 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
3054 {
3055     TRACE("surface %p.\n", surface);
3056
3057     return surface->resource.parent;
3058 }
3059
3060 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
3061 {
3062     TRACE("surface %p.\n", surface);
3063
3064     return &surface->resource;
3065 }
3066
3067 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
3068 {
3069     TRACE("surface %p, flags %#x.\n", surface, flags);
3070
3071     switch (flags)
3072     {
3073         case WINEDDGBS_CANBLT:
3074         case WINEDDGBS_ISBLTDONE:
3075             return WINED3D_OK;
3076
3077         default:
3078             return WINED3DERR_INVALIDCALL;
3079     }
3080 }
3081
3082 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
3083 {
3084     TRACE("surface %p, flags %#x.\n", surface, flags);
3085
3086     /* XXX: DDERR_INVALIDSURFACETYPE */
3087
3088     switch (flags)
3089     {
3090         case WINEDDGFS_CANFLIP:
3091         case WINEDDGFS_ISFLIPDONE:
3092             return WINED3D_OK;
3093
3094         default:
3095             return WINED3DERR_INVALIDCALL;
3096     }
3097 }
3098
3099 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
3100 {
3101     TRACE("surface %p.\n", surface);
3102
3103     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
3104     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
3105 }
3106
3107 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
3108 {
3109     TRACE("surface %p.\n", surface);
3110
3111     surface->flags &= ~SFLAG_LOST;
3112     return WINED3D_OK;
3113 }
3114
3115 void CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
3116 {
3117     TRACE("surface %p, palette %p.\n", surface, palette);
3118
3119     if (surface->palette == palette)
3120     {
3121         TRACE("Nop palette change.\n");
3122         return;
3123     }
3124
3125     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3126         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3127
3128     surface->palette = palette;
3129
3130     if (palette)
3131     {
3132         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3133             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3134
3135         surface->surface_ops->surface_realize_palette(surface);
3136     }
3137 }
3138
3139 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3140         DWORD flags, const struct wined3d_color_key *color_key)
3141 {
3142     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3143
3144     if (flags & WINEDDCKEY_COLORSPACE)
3145     {
3146         FIXME(" colorkey value not supported (%08x) !\n", flags);
3147         return WINED3DERR_INVALIDCALL;
3148     }
3149
3150     /* Dirtify the surface, but only if a key was changed. */
3151     if (color_key)
3152     {
3153         switch (flags & ~WINEDDCKEY_COLORSPACE)
3154         {
3155             case WINEDDCKEY_DESTBLT:
3156                 surface->dst_blt_color_key = *color_key;
3157                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3158                 break;
3159
3160             case WINEDDCKEY_DESTOVERLAY:
3161                 surface->dst_overlay_color_key = *color_key;
3162                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3163                 break;
3164
3165             case WINEDDCKEY_SRCOVERLAY:
3166                 surface->src_overlay_color_key = *color_key;
3167                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3168                 break;
3169
3170             case WINEDDCKEY_SRCBLT:
3171                 surface->src_blt_color_key = *color_key;
3172                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3173                 break;
3174         }
3175     }
3176     else
3177     {
3178         switch (flags & ~WINEDDCKEY_COLORSPACE)
3179         {
3180             case WINEDDCKEY_DESTBLT:
3181                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3182                 break;
3183
3184             case WINEDDCKEY_DESTOVERLAY:
3185                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3186                 break;
3187
3188             case WINEDDCKEY_SRCOVERLAY:
3189                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3190                 break;
3191
3192             case WINEDDCKEY_SRCBLT:
3193                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3194                 break;
3195         }
3196     }
3197
3198     return WINED3D_OK;
3199 }
3200
3201 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3202 {
3203     TRACE("surface %p.\n", surface);
3204
3205     return surface->palette;
3206 }
3207
3208 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3209 {
3210     const struct wined3d_format *format = surface->resource.format;
3211     DWORD pitch;
3212
3213     TRACE("surface %p.\n", surface);
3214
3215     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3216     {
3217         /* Since compressed formats are block based, pitch means the amount of
3218          * bytes to the next row of block rather than the next row of pixels. */
3219         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3220         pitch = row_block_count * format->block_byte_count;
3221     }
3222     else
3223     {
3224         unsigned char alignment = surface->resource.device->surface_alignment;
3225         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3226         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3227     }
3228
3229     TRACE("Returning %u.\n", pitch);
3230
3231     return pitch;
3232 }
3233
3234 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3235 {
3236     TRACE("surface %p, mem %p.\n", surface, mem);
3237
3238     if (surface->resource.map_count || (surface->flags & SFLAG_DCINUSE))
3239     {
3240         WARN("Surface is mapped or the DC is in use.\n");
3241         return WINED3DERR_INVALIDCALL;
3242     }
3243
3244     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3245     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3246     {
3247         ERR("Not supported on render targets.\n");
3248         return WINED3DERR_INVALIDCALL;
3249     }
3250
3251     if (mem && mem != surface->resource.allocatedMemory)
3252     {
3253         void *release = NULL;
3254
3255         /* Do I have to copy the old surface content? */
3256         if (surface->flags & SFLAG_DIBSECTION)
3257         {
3258             DeleteDC(surface->hDC);
3259             DeleteObject(surface->dib.DIBsection);
3260             surface->dib.bitmap_data = NULL;
3261             surface->resource.allocatedMemory = NULL;
3262             surface->hDC = NULL;
3263             surface->flags &= ~SFLAG_DIBSECTION;
3264         }
3265         else if (!(surface->flags & SFLAG_USERPTR))
3266         {
3267             release = surface->resource.heapMemory;
3268             surface->resource.heapMemory = NULL;
3269         }
3270         surface->resource.allocatedMemory = mem;
3271         surface->flags |= SFLAG_USERPTR;
3272
3273         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3274         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3275
3276         /* For client textures OpenGL has to be notified. */
3277         if (surface->flags & SFLAG_CLIENT)
3278             surface_release_client_storage(surface);
3279
3280         /* Now free the old memory if any. */
3281         HeapFree(GetProcessHeap(), 0, release);
3282     }
3283     else if (surface->flags & SFLAG_USERPTR)
3284     {
3285         /* HeapMemory should be NULL already. */
3286         if (surface->resource.heapMemory)
3287             ERR("User pointer surface has heap memory allocated.\n");
3288
3289         if (!mem)
3290         {
3291             surface->resource.allocatedMemory = NULL;
3292             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3293
3294             if (surface->flags & SFLAG_CLIENT)
3295                 surface_release_client_storage(surface);
3296
3297             surface_prepare_system_memory(surface);
3298         }
3299
3300         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3301     }
3302
3303     return WINED3D_OK;
3304 }
3305
3306 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3307 {
3308     LONG w, h;
3309
3310     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3311
3312     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3313     {
3314         WARN("Not an overlay surface.\n");
3315         return WINEDDERR_NOTAOVERLAYSURFACE;
3316     }
3317
3318     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3319     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3320     surface->overlay_destrect.left = x;
3321     surface->overlay_destrect.top = y;
3322     surface->overlay_destrect.right = x + w;
3323     surface->overlay_destrect.bottom = y + h;
3324
3325     surface_draw_overlay(surface);
3326
3327     return WINED3D_OK;
3328 }
3329
3330 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3331 {
3332     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3333
3334     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3335     {
3336         TRACE("Not an overlay surface.\n");
3337         return WINEDDERR_NOTAOVERLAYSURFACE;
3338     }
3339
3340     if (!surface->overlay_dest)
3341     {
3342         TRACE("Overlay not visible.\n");
3343         *x = 0;
3344         *y = 0;
3345         return WINEDDERR_OVERLAYNOTVISIBLE;
3346     }
3347
3348     *x = surface->overlay_destrect.left;
3349     *y = surface->overlay_destrect.top;
3350
3351     TRACE("Returning position %d, %d.\n", *x, *y);
3352
3353     return WINED3D_OK;
3354 }
3355
3356 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3357         DWORD flags, struct wined3d_surface *ref)
3358 {
3359     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3360
3361     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3362     {
3363         TRACE("Not an overlay surface.\n");
3364         return WINEDDERR_NOTAOVERLAYSURFACE;
3365     }
3366
3367     return WINED3D_OK;
3368 }
3369
3370 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3371         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3372 {
3373     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3374             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3375
3376     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3377     {
3378         WARN("Not an overlay surface.\n");
3379         return WINEDDERR_NOTAOVERLAYSURFACE;
3380     }
3381     else if (!dst_surface)
3382     {
3383         WARN("Dest surface is NULL.\n");
3384         return WINED3DERR_INVALIDCALL;
3385     }
3386
3387     if (src_rect)
3388     {
3389         surface->overlay_srcrect = *src_rect;
3390     }
3391     else
3392     {
3393         surface->overlay_srcrect.left = 0;
3394         surface->overlay_srcrect.top = 0;
3395         surface->overlay_srcrect.right = surface->resource.width;
3396         surface->overlay_srcrect.bottom = surface->resource.height;
3397     }
3398
3399     if (dst_rect)
3400     {
3401         surface->overlay_destrect = *dst_rect;
3402     }
3403     else
3404     {
3405         surface->overlay_destrect.left = 0;
3406         surface->overlay_destrect.top = 0;
3407         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3408         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3409     }
3410
3411     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3412     {
3413         surface->overlay_dest = NULL;
3414         list_remove(&surface->overlay_entry);
3415     }
3416
3417     if (flags & WINEDDOVER_SHOW)
3418     {
3419         if (surface->overlay_dest != dst_surface)
3420         {
3421             surface->overlay_dest = dst_surface;
3422             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3423         }
3424     }
3425     else if (flags & WINEDDOVER_HIDE)
3426     {
3427         /* tests show that the rectangles are erased on hide */
3428         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3429         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3430         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3431         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3432         surface->overlay_dest = NULL;
3433     }
3434
3435     surface_draw_overlay(surface);
3436
3437     return WINED3D_OK;
3438 }
3439
3440 HRESULT CDECL wined3d_surface_update_desc(struct wined3d_surface *surface,
3441         UINT width, UINT height, enum wined3d_format_id format_id,
3442         enum wined3d_multisample_type multisample_type, UINT multisample_quality)
3443 {
3444     struct wined3d_device *device = surface->resource.device;
3445     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3446     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
3447     UINT resource_size = wined3d_format_calculate_size(format, device->surface_alignment, width, height);
3448
3449     TRACE("surface %p, width %u, height %u, format %s, multisample_type %#x, multisample_quality %u.\n",
3450             surface, width, height, debug_d3dformat(format_id), multisample_type, multisample_type);
3451
3452     if (!resource_size)
3453         return WINED3DERR_INVALIDCALL;
3454
3455     if (device->d3d_initialized)
3456         surface->resource.resource_ops->resource_unload(&surface->resource);
3457
3458     if (surface->flags & SFLAG_DIBSECTION)
3459     {
3460         DeleteDC(surface->hDC);
3461         DeleteObject(surface->dib.DIBsection);
3462         surface->dib.bitmap_data = NULL;
3463         surface->flags &= ~SFLAG_DIBSECTION;
3464     }
3465
3466     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_USERPTR);
3467     surface->resource.allocatedMemory = NULL;
3468     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3469     surface->resource.heapMemory = NULL;
3470
3471     surface->resource.width = width;
3472     surface->resource.height = height;
3473     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[ARB_TEXTURE_RECTANGLE]
3474             || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
3475     {
3476         surface->pow2Width = width;
3477         surface->pow2Height = height;
3478     }
3479     else
3480     {
3481         surface->pow2Width = surface->pow2Height = 1;
3482         while (surface->pow2Width < width)
3483             surface->pow2Width <<= 1;
3484         while (surface->pow2Height < height)
3485             surface->pow2Height <<= 1;
3486     }
3487
3488     if (surface->pow2Width != width || surface->pow2Height != height)
3489         surface->flags |= SFLAG_NONPOW2;
3490     else
3491         surface->flags &= ~SFLAG_NONPOW2;
3492
3493     surface->resource.format = format;
3494     surface->resource.multisample_type = multisample_type;
3495     surface->resource.multisample_quality = multisample_quality;
3496     surface->resource.size = resource_size;
3497
3498     if (!surface_init_sysmem(surface))
3499         return E_OUTOFMEMORY;
3500
3501     return WINED3D_OK;
3502 }
3503
3504 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3505         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3506 {
3507     unsigned short *dst_s;
3508     const float *src_f;
3509     unsigned int x, y;
3510
3511     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3512
3513     for (y = 0; y < h; ++y)
3514     {
3515         src_f = (const float *)(src + y * pitch_in);
3516         dst_s = (unsigned short *) (dst + y * pitch_out);
3517         for (x = 0; x < w; ++x)
3518         {
3519             dst_s[x] = float_32_to_16(src_f + x);
3520         }
3521     }
3522 }
3523
3524 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3525         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3526 {
3527     static const unsigned char convert_5to8[] =
3528     {
3529         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3530         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3531         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3532         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3533     };
3534     static const unsigned char convert_6to8[] =
3535     {
3536         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3537         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3538         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3539         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3540         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3541         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3542         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3543         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3544     };
3545     unsigned int x, y;
3546
3547     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3548
3549     for (y = 0; y < h; ++y)
3550     {
3551         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3552         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3553         for (x = 0; x < w; ++x)
3554         {
3555             WORD pixel = src_line[x];
3556             dst_line[x] = 0xff000000
3557                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3558                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3559                     | convert_5to8[(pixel & 0x001f)];
3560         }
3561     }
3562 }
3563
3564 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3565  * in both cases we're just setting the X / Alpha channel to 0xff. */
3566 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3567         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3568 {
3569     unsigned int x, y;
3570
3571     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3572
3573     for (y = 0; y < h; ++y)
3574     {
3575         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3576         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3577
3578         for (x = 0; x < w; ++x)
3579         {
3580             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3581         }
3582     }
3583 }
3584
3585 static inline BYTE cliptobyte(int x)
3586 {
3587     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3588 }
3589
3590 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3591         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3592 {
3593     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3594     unsigned int x, y;
3595
3596     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3597
3598     for (y = 0; y < h; ++y)
3599     {
3600         const BYTE *src_line = src + y * pitch_in;
3601         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3602         for (x = 0; x < w; ++x)
3603         {
3604             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3605              *     C = Y - 16; D = U - 128; E = V - 128;
3606              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3607              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3608              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3609              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3610              * U and V are shared between the pixels. */
3611             if (!(x & 1)) /* For every even pixel, read new U and V. */
3612             {
3613                 d = (int) src_line[1] - 128;
3614                 e = (int) src_line[3] - 128;
3615                 r2 = 409 * e + 128;
3616                 g2 = - 100 * d - 208 * e + 128;
3617                 b2 = 516 * d + 128;
3618             }
3619             c2 = 298 * ((int) src_line[0] - 16);
3620             dst_line[x] = 0xff000000
3621                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3622                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3623                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3624                 /* Scale RGB values to 0..255 range,
3625                  * then clip them if still not in range (may be negative),
3626                  * then shift them within DWORD if necessary. */
3627             src_line += 2;
3628         }
3629     }
3630 }
3631
3632 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3633         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3634 {
3635     unsigned int x, y;
3636     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3637
3638     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3639
3640     for (y = 0; y < h; ++y)
3641     {
3642         const BYTE *src_line = src + y * pitch_in;
3643         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3644         for (x = 0; x < w; ++x)
3645         {
3646             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3647              *     C = Y - 16; D = U - 128; E = V - 128;
3648              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3649              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3650              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3651              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3652              * U and V are shared between the pixels. */
3653             if (!(x & 1)) /* For every even pixel, read new U and V. */
3654             {
3655                 d = (int) src_line[1] - 128;
3656                 e = (int) src_line[3] - 128;
3657                 r2 = 409 * e + 128;
3658                 g2 = - 100 * d - 208 * e + 128;
3659                 b2 = 516 * d + 128;
3660             }
3661             c2 = 298 * ((int) src_line[0] - 16);
3662             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3663                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3664                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3665                 /* Scale RGB values to 0..255 range,
3666                  * then clip them if still not in range (may be negative),
3667                  * then shift them within DWORD if necessary. */
3668             src_line += 2;
3669         }
3670     }
3671 }
3672
3673 struct d3dfmt_converter_desc
3674 {
3675     enum wined3d_format_id from, to;
3676     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3677 };
3678
3679 static const struct d3dfmt_converter_desc converters[] =
3680 {
3681     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3682     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3683     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3684     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3685     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3686     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3687 };
3688
3689 static inline const struct d3dfmt_converter_desc *find_converter(enum wined3d_format_id from,
3690         enum wined3d_format_id to)
3691 {
3692     unsigned int i;
3693
3694     for (i = 0; i < (sizeof(converters) / sizeof(*converters)); ++i)
3695     {
3696         if (converters[i].from == from && converters[i].to == to)
3697             return &converters[i];
3698     }
3699
3700     return NULL;
3701 }
3702
3703 /*****************************************************************************
3704  * surface_convert_format
3705  *
3706  * Creates a duplicate of a surface in a different format. Is used by Blt to
3707  * blit between surfaces with different formats.
3708  *
3709  * Parameters
3710  *  source: Source surface
3711  *  fmt: Requested destination format
3712  *
3713  *****************************************************************************/
3714 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3715 {
3716     struct wined3d_map_desc src_map, dst_map;
3717     const struct d3dfmt_converter_desc *conv;
3718     struct wined3d_surface *ret = NULL;
3719     HRESULT hr;
3720
3721     conv = find_converter(source->resource.format->id, to_fmt);
3722     if (!conv)
3723     {
3724         FIXME("Cannot find a conversion function from format %s to %s.\n",
3725                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3726         return NULL;
3727     }
3728
3729     /* FIXME: Multisampled conversion? */
3730     if (FAILED(hr = wined3d_surface_create(source->resource.device, source->resource.width, source->resource.height,
3731             to_fmt, 0, WINED3D_POOL_SCRATCH, WINED3D_MULTISAMPLE_NONE, 0,
3732             WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD, NULL, &wined3d_null_parent_ops, &ret)))
3733     {
3734         ERR("Failed to create a destination surface for conversion.\n");
3735         return NULL;
3736     }
3737
3738     memset(&src_map, 0, sizeof(src_map));
3739     memset(&dst_map, 0, sizeof(dst_map));
3740
3741     if (FAILED(hr = wined3d_surface_map(source, &src_map, NULL, WINED3D_MAP_READONLY)))
3742     {
3743         ERR("Failed to lock the source surface.\n");
3744         wined3d_surface_decref(ret);
3745         return NULL;
3746     }
3747     if (FAILED(hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3D_MAP_READONLY)))
3748     {
3749         ERR("Failed to lock the destination surface.\n");
3750         wined3d_surface_unmap(source);
3751         wined3d_surface_decref(ret);
3752         return NULL;
3753     }
3754
3755     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3756             source->resource.width, source->resource.height);
3757
3758     wined3d_surface_unmap(ret);
3759     wined3d_surface_unmap(source);
3760
3761     return ret;
3762 }
3763
3764 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3765         unsigned int bpp, UINT pitch, DWORD color)
3766 {
3767     BYTE *first;
3768     unsigned int x, y;
3769
3770     /* Do first row */
3771
3772 #define COLORFILL_ROW(type) \
3773 do { \
3774     type *d = (type *)buf; \
3775     for (x = 0; x < width; ++x) \
3776         d[x] = (type)color; \
3777 } while(0)
3778
3779     switch (bpp)
3780     {
3781         case 1:
3782             COLORFILL_ROW(BYTE);
3783             break;
3784
3785         case 2:
3786             COLORFILL_ROW(WORD);
3787             break;
3788
3789         case 3:
3790         {
3791             BYTE *d = buf;
3792             for (x = 0; x < width; ++x, d += 3)
3793             {
3794                 d[0] = (color      ) & 0xff;
3795                 d[1] = (color >>  8) & 0xff;
3796                 d[2] = (color >> 16) & 0xff;
3797             }
3798             break;
3799         }
3800         case 4:
3801             COLORFILL_ROW(DWORD);
3802             break;
3803
3804         default:
3805             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3806             return WINED3DERR_NOTAVAILABLE;
3807     }
3808
3809 #undef COLORFILL_ROW
3810
3811     /* Now copy first row. */
3812     first = buf;
3813     for (y = 1; y < height; ++y)
3814     {
3815         buf += pitch;
3816         memcpy(buf, first, width * bpp);
3817     }
3818
3819     return WINED3D_OK;
3820 }
3821
3822 struct wined3d_surface * CDECL wined3d_surface_from_resource(struct wined3d_resource *resource)
3823 {
3824     return surface_from_resource(resource);
3825 }
3826
3827 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3828 {
3829     TRACE("surface %p.\n", surface);
3830
3831     if (!surface->resource.map_count)
3832     {
3833         WARN("Trying to unmap unmapped surface.\n");
3834         return WINEDDERR_NOTLOCKED;
3835     }
3836     --surface->resource.map_count;
3837
3838     surface->surface_ops->surface_unmap(surface);
3839
3840     return WINED3D_OK;
3841 }
3842
3843 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3844         struct wined3d_map_desc *map_desc, const RECT *rect, DWORD flags)
3845 {
3846     const struct wined3d_format *format = surface->resource.format;
3847
3848     TRACE("surface %p, map_desc %p, rect %s, flags %#x.\n",
3849             surface, map_desc, wine_dbgstr_rect(rect), flags);
3850
3851     if (surface->resource.map_count)
3852     {
3853         WARN("Surface is already mapped.\n");
3854         return WINED3DERR_INVALIDCALL;
3855     }
3856
3857     if ((format->flags & WINED3DFMT_FLAG_BLOCKS) && rect
3858             && !surface_check_block_align(surface, rect))
3859     {
3860         WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3861                 wine_dbgstr_rect(rect), format->block_width, format->block_height);
3862
3863         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3864             return WINED3DERR_INVALIDCALL;
3865     }
3866
3867     ++surface->resource.map_count;
3868
3869     if (!(surface->flags & SFLAG_LOCKABLE))
3870         WARN("Trying to lock unlockable surface.\n");
3871
3872     /* Performance optimization: Count how often a surface is mapped, if it is
3873      * mapped regularly do not throw away the system memory copy. This avoids
3874      * the need to download the surface from OpenGL all the time. The surface
3875      * is still downloaded if the OpenGL texture is changed. */
3876     if (!(surface->flags & SFLAG_DYNLOCK))
3877     {
3878         if (++surface->lockCount > MAXLOCKCOUNT)
3879         {
3880             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3881             surface->flags |= SFLAG_DYNLOCK;
3882         }
3883     }
3884
3885     surface->surface_ops->surface_map(surface, rect, flags);
3886
3887     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3888         map_desc->row_pitch = surface->resource.width * format->byte_count;
3889     else
3890         map_desc->row_pitch = wined3d_surface_get_pitch(surface);
3891     map_desc->slice_pitch = 0;
3892
3893     if (!rect)
3894     {
3895         map_desc->data = surface->resource.allocatedMemory;
3896         surface->lockedRect.left = 0;
3897         surface->lockedRect.top = 0;
3898         surface->lockedRect.right = surface->resource.width;
3899         surface->lockedRect.bottom = surface->resource.height;
3900     }
3901     else
3902     {
3903         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3904         {
3905             /* Compressed textures are block based, so calculate the offset of
3906              * the block that contains the top-left pixel of the locked rectangle. */
3907             map_desc->data = surface->resource.allocatedMemory
3908                     + ((rect->top / format->block_height) * map_desc->row_pitch)
3909                     + ((rect->left / format->block_width) * format->block_byte_count);
3910         }
3911         else
3912         {
3913             map_desc->data = surface->resource.allocatedMemory
3914                     + (map_desc->row_pitch * rect->top)
3915                     + (rect->left * format->byte_count);
3916         }
3917         surface->lockedRect.left = rect->left;
3918         surface->lockedRect.top = rect->top;
3919         surface->lockedRect.right = rect->right;
3920         surface->lockedRect.bottom = rect->bottom;
3921     }
3922
3923     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3924     TRACE("Returning memory %p, pitch %u.\n", map_desc->data, map_desc->row_pitch);
3925
3926     return WINED3D_OK;
3927 }
3928
3929 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3930 {
3931     struct wined3d_map_desc map;
3932     HRESULT hr;
3933
3934     TRACE("surface %p, dc %p.\n", surface, dc);
3935
3936     if (surface->flags & SFLAG_USERPTR)
3937     {
3938         ERR("Not supported on surfaces with application-provided memory.\n");
3939         return WINEDDERR_NODC;
3940     }
3941
3942     /* Give more detailed info for ddraw. */
3943     if (surface->flags & SFLAG_DCINUSE)
3944         return WINEDDERR_DCALREADYCREATED;
3945
3946     /* Can't GetDC if the surface is locked. */
3947     if (surface->resource.map_count)
3948         return WINED3DERR_INVALIDCALL;
3949
3950     /* Create a DIB section if there isn't a dc yet. */
3951     if (!surface->hDC)
3952     {
3953         if (surface->flags & SFLAG_CLIENT)
3954         {
3955             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3956             surface_release_client_storage(surface);
3957         }
3958         hr = surface_create_dib_section(surface);
3959         if (FAILED(hr))
3960             return WINED3DERR_INVALIDCALL;
3961
3962         /* Use the DIB section from now on if we are not using a PBO. */
3963         if (!(surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)))
3964         {
3965             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3966             surface->resource.heapMemory = NULL;
3967             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3968         }
3969     }
3970
3971     /* Map the surface. */
3972     hr = wined3d_surface_map(surface, &map, NULL, 0);
3973     if (FAILED(hr))
3974     {
3975         ERR("Map failed, hr %#x.\n", hr);
3976         return hr;
3977     }
3978
3979     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3980      * activates the allocatedMemory. */
3981     if (surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM))
3982         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3983
3984     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3985             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3986     {
3987         /* GetDC on palettized formats is unsupported in D3D9, and the method
3988          * is missing in D3D8, so this should only be used for DX <=7
3989          * surfaces (with non-device palettes). */
3990         const PALETTEENTRY *pal = NULL;
3991
3992         if (surface->palette)
3993         {
3994             pal = surface->palette->palents;
3995         }
3996         else
3997         {
3998             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3999             struct wined3d_surface *dds_primary = swapchain->front_buffer;
4000
4001             if (dds_primary && dds_primary->palette)
4002                 pal = dds_primary->palette->palents;
4003         }
4004
4005         if (pal)
4006         {
4007             RGBQUAD col[256];
4008             unsigned int i;
4009
4010             for (i = 0; i < 256; ++i)
4011             {
4012                 col[i].rgbRed = pal[i].peRed;
4013                 col[i].rgbGreen = pal[i].peGreen;
4014                 col[i].rgbBlue = pal[i].peBlue;
4015                 col[i].rgbReserved = 0;
4016             }
4017             SetDIBColorTable(surface->hDC, 0, 256, col);
4018         }
4019     }
4020
4021     surface->flags |= SFLAG_DCINUSE;
4022
4023     *dc = surface->hDC;
4024     TRACE("Returning dc %p.\n", *dc);
4025
4026     return WINED3D_OK;
4027 }
4028
4029 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
4030 {
4031     TRACE("surface %p, dc %p.\n", surface, dc);
4032
4033     if (!(surface->flags & SFLAG_DCINUSE))
4034         return WINEDDERR_NODC;
4035
4036     if (surface->hDC != dc)
4037     {
4038         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
4039                 dc, surface->hDC);
4040         return WINEDDERR_NODC;
4041     }
4042
4043     /* Copy the contents of the DIB over to the PBO. */
4044     if ((surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)) && surface->resource.allocatedMemory)
4045         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
4046
4047     /* We locked first, so unlock now. */
4048     wined3d_surface_unmap(surface);
4049
4050     surface->flags &= ~SFLAG_DCINUSE;
4051
4052     return WINED3D_OK;
4053 }
4054
4055 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
4056 {
4057     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
4058
4059     if (flags)
4060     {
4061         static UINT once;
4062         if (!once++)
4063             FIXME("Ignoring flags %#x.\n", flags);
4064         else
4065             WARN("Ignoring flags %#x.\n", flags);
4066     }
4067
4068     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4069     {
4070         ERR("Not supported on swapchain surfaces.\n");
4071         return WINEDDERR_NOTFLIPPABLE;
4072     }
4073
4074     /* Flipping is only supported on render targets and overlays. */
4075     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
4076     {
4077         WARN("Tried to flip a non-render target, non-overlay surface.\n");
4078         return WINEDDERR_NOTFLIPPABLE;
4079     }
4080
4081     flip_surface(surface, override);
4082
4083     /* Update overlays if they're visible. */
4084     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
4085         return surface_draw_overlay(surface);
4086
4087     return WINED3D_OK;
4088 }
4089
4090 /* Do not call while under the GL lock. */
4091 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
4092 {
4093     struct wined3d_device *device = surface->resource.device;
4094
4095     TRACE("iface %p, srgb %#x.\n", surface, srgb);
4096
4097     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4098     {
4099         struct wined3d_texture *texture = surface->container.u.texture;
4100
4101         TRACE("Passing to container (%p).\n", texture);
4102         texture->texture_ops->texture_preload(texture, srgb);
4103     }
4104     else
4105     {
4106         struct wined3d_context *context;
4107
4108         TRACE("(%p) : About to load surface\n", surface);
4109
4110         /* TODO: Use already acquired context when possible. */
4111         context = context_acquire(device, NULL);
4112
4113         surface_load(surface, srgb == SRGB_SRGB);
4114
4115         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
4116         {
4117             /* Tell opengl to try and keep this texture in video ram (well mostly) */
4118             GLclampf tmp;
4119             tmp = 0.9f;
4120             context->gl_info->gl_ops.gl.p_glPrioritizeTextures(1, &surface->texture_name, &tmp);
4121         }
4122
4123         context_release(context);
4124     }
4125 }
4126
4127 /* Read the framebuffer back into the surface */
4128 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4129 {
4130     struct wined3d_device *device = surface->resource.device;
4131     const struct wined3d_gl_info *gl_info;
4132     struct wined3d_context *context;
4133     BYTE *mem;
4134     GLint fmt;
4135     GLint type;
4136     BYTE *row, *top, *bottom;
4137     int i;
4138     BOOL bpp;
4139     RECT local_rect;
4140     BOOL srcIsUpsideDown;
4141     GLint rowLen = 0;
4142     GLint skipPix = 0;
4143     GLint skipRow = 0;
4144
4145     context = context_acquire(device, surface);
4146     context_apply_blit_state(context, device);
4147     gl_info = context->gl_info;
4148
4149     /* Select the correct read buffer, and give some debug output.
4150      * There is no need to keep track of the current read buffer or reset it, every part of the code
4151      * that reads sets the read buffer as desired.
4152      */
4153     if (surface_is_offscreen(surface))
4154     {
4155         /* Mapping the primary render target which is not on a swapchain.
4156          * Read from the back buffer. */
4157         TRACE("Mapping offscreen render target.\n");
4158         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
4159         srcIsUpsideDown = TRUE;
4160     }
4161     else
4162     {
4163         /* Onscreen surfaces are always part of a swapchain */
4164         GLenum buffer = surface_get_gl_buffer(surface);
4165         TRACE("Mapping %#x buffer.\n", buffer);
4166         gl_info->gl_ops.gl.p_glReadBuffer(buffer);
4167         checkGLcall("glReadBuffer");
4168         srcIsUpsideDown = FALSE;
4169     }
4170
4171     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4172     if (!rect)
4173     {
4174         local_rect.left = 0;
4175         local_rect.top = 0;
4176         local_rect.right = surface->resource.width;
4177         local_rect.bottom = surface->resource.height;
4178     }
4179     else
4180     {
4181         local_rect = *rect;
4182     }
4183     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4184
4185     switch (surface->resource.format->id)
4186     {
4187         case WINED3DFMT_P8_UINT:
4188         {
4189             if (primary_render_target_is_p8(device))
4190             {
4191                 /* In case of P8 render targets the index is stored in the alpha component */
4192                 fmt = GL_ALPHA;
4193                 type = GL_UNSIGNED_BYTE;
4194                 mem = dest;
4195                 bpp = surface->resource.format->byte_count;
4196             }
4197             else
4198             {
4199                 /* GL can't return palettized data, so read ARGB pixels into a
4200                  * separate block of memory and convert them into palettized format
4201                  * in software. Slow, but if the app means to use palettized render
4202                  * targets and locks it...
4203                  *
4204                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4205                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4206                  * for the color channels when palettizing the colors.
4207                  */
4208                 fmt = GL_RGB;
4209                 type = GL_UNSIGNED_BYTE;
4210                 pitch *= 3;
4211                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4212                 if (!mem)
4213                 {
4214                     ERR("Out of memory\n");
4215                     return;
4216                 }
4217                 bpp = surface->resource.format->byte_count * 3;
4218             }
4219         }
4220         break;
4221
4222         default:
4223             mem = dest;
4224             fmt = surface->resource.format->glFormat;
4225             type = surface->resource.format->glType;
4226             bpp = surface->resource.format->byte_count;
4227     }
4228
4229     if (surface->flags & SFLAG_PBO)
4230     {
4231         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4232         checkGLcall("glBindBufferARB");
4233         if (mem)
4234         {
4235             ERR("mem not null for pbo -- unexpected\n");
4236             mem = NULL;
4237         }
4238     }
4239
4240     /* Save old pixel store pack state */
4241     gl_info->gl_ops.gl.p_glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4242     checkGLcall("glGetIntegerv");
4243     gl_info->gl_ops.gl.p_glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4244     checkGLcall("glGetIntegerv");
4245     gl_info->gl_ops.gl.p_glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4246     checkGLcall("glGetIntegerv");
4247
4248     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4249     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4250     checkGLcall("glPixelStorei");
4251     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4252     checkGLcall("glPixelStorei");
4253     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4254     checkGLcall("glPixelStorei");
4255
4256     gl_info->gl_ops.gl.p_glReadPixels(local_rect.left,
4257             !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4258             local_rect.right - local_rect.left,
4259             local_rect.bottom - local_rect.top,
4260             fmt, type, mem);
4261     checkGLcall("glReadPixels");
4262
4263     /* Reset previous pixel store pack state */
4264     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4265     checkGLcall("glPixelStorei");
4266     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4267     checkGLcall("glPixelStorei");
4268     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4269     checkGLcall("glPixelStorei");
4270
4271     if (surface->flags & SFLAG_PBO)
4272     {
4273         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4274         checkGLcall("glBindBufferARB");
4275
4276         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4277          * to get a pointer to it and perform the flipping in software. This is a lot
4278          * faster than calling glReadPixels for each line. In case we want more speed
4279          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4280         if (!srcIsUpsideDown)
4281         {
4282             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4283             checkGLcall("glBindBufferARB");
4284
4285             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4286             checkGLcall("glMapBufferARB");
4287         }
4288     }
4289
4290     /* TODO: Merge this with the palettization loop below for P8 targets */
4291     if(!srcIsUpsideDown) {
4292         UINT len, off;
4293         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4294             Flip the lines in software */
4295         len = (local_rect.right - local_rect.left) * bpp;
4296         off = local_rect.left * bpp;
4297
4298         row = HeapAlloc(GetProcessHeap(), 0, len);
4299         if(!row) {
4300             ERR("Out of memory\n");
4301             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4302                 HeapFree(GetProcessHeap(), 0, mem);
4303             return;
4304         }
4305
4306         top = mem + pitch * local_rect.top;
4307         bottom = mem + pitch * (local_rect.bottom - 1);
4308         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4309             memcpy(row, top + off, len);
4310             memcpy(top + off, bottom + off, len);
4311             memcpy(bottom + off, row, len);
4312             top += pitch;
4313             bottom -= pitch;
4314         }
4315         HeapFree(GetProcessHeap(), 0, row);
4316
4317         /* Unmap the temp PBO buffer */
4318         if (surface->flags & SFLAG_PBO)
4319         {
4320             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4321             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4322         }
4323     }
4324
4325     context_release(context);
4326
4327     /* For P8 textures we need to perform an inverse palette lookup. This is
4328      * done by searching for a palette index which matches the RGB value.
4329      * Note this isn't guaranteed to work when there are multiple entries for
4330      * the same color but we have no choice. In case of P8 render targets,
4331      * the index is stored in the alpha component so no conversion is needed. */
4332     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4333     {
4334         const PALETTEENTRY *pal = NULL;
4335         DWORD width = pitch / 3;
4336         int x, y, c;
4337
4338         if (surface->palette)
4339         {
4340             pal = surface->palette->palents;
4341         }
4342         else
4343         {
4344             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4345             HeapFree(GetProcessHeap(), 0, mem);
4346             return;
4347         }
4348
4349         for(y = local_rect.top; y < local_rect.bottom; y++) {
4350             for(x = local_rect.left; x < local_rect.right; x++) {
4351                 /*                      start              lines            pixels      */
4352                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4353                 const BYTE *green = blue  + 1;
4354                 const BYTE *red = green + 1;
4355
4356                 for(c = 0; c < 256; c++) {
4357                     if(*red   == pal[c].peRed   &&
4358                        *green == pal[c].peGreen &&
4359                        *blue  == pal[c].peBlue)
4360                     {
4361                         *((BYTE *) dest + y * width + x) = c;
4362                         break;
4363                     }
4364                 }
4365             }
4366         }
4367         HeapFree(GetProcessHeap(), 0, mem);
4368     }
4369 }
4370
4371 /* Read the framebuffer contents into a texture. Note that this function
4372  * doesn't do any kind of flipping. Using this on an onscreen surface will
4373  * result in a flipped D3D texture. */
4374 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4375 {
4376     struct wined3d_device *device = surface->resource.device;
4377     const struct wined3d_gl_info *gl_info;
4378     struct wined3d_context *context;
4379
4380     context = context_acquire(device, surface);
4381     gl_info = context->gl_info;
4382     device_invalidate_state(device, STATE_FRAMEBUFFER);
4383
4384     surface_prepare_texture(surface, context, srgb);
4385     surface_bind_and_dirtify(surface, context, srgb);
4386
4387     TRACE("Reading back offscreen render target %p.\n", surface);
4388
4389     if (surface_is_offscreen(surface))
4390         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
4391     else
4392         gl_info->gl_ops.gl.p_glReadBuffer(surface_get_gl_buffer(surface));
4393     checkGLcall("glReadBuffer");
4394
4395     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4396             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4397     checkGLcall("glCopyTexSubImage2D");
4398
4399     context_release(context);
4400 }
4401
4402 /* Context activation is done by the caller. */
4403 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4404         struct wined3d_context *context, BOOL srgb)
4405 {
4406     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4407     enum wined3d_conversion_type convert;
4408     struct wined3d_format format;
4409
4410     if (surface->flags & alloc_flag) return;
4411
4412     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4413     if (convert != WINED3D_CT_NONE || format.convert)
4414         surface->flags |= SFLAG_CONVERTED;
4415     else surface->flags &= ~SFLAG_CONVERTED;
4416
4417     surface_bind_and_dirtify(surface, context, srgb);
4418     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4419     surface->flags |= alloc_flag;
4420 }
4421
4422 /* Context activation is done by the caller. */
4423 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4424 {
4425     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4426     {
4427         struct wined3d_texture *texture = surface->container.u.texture;
4428         UINT sub_count = texture->level_count * texture->layer_count;
4429         UINT i;
4430
4431         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4432
4433         for (i = 0; i < sub_count; ++i)
4434         {
4435             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4436             surface_prepare_texture_internal(s, context, srgb);
4437         }
4438
4439         return;
4440     }
4441
4442     surface_prepare_texture_internal(surface, context, srgb);
4443 }
4444
4445 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4446 {
4447     if (multisample)
4448     {
4449         if (surface->rb_multisample)
4450             return;
4451
4452         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4453         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4454         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4455                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4456         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4457     }
4458     else
4459     {
4460         if (surface->rb_resolved)
4461             return;
4462
4463         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4464         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4465         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4466                 surface->pow2Width, surface->pow2Height);
4467         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4468     }
4469 }
4470
4471 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4472         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4473 {
4474     struct wined3d_device *device = surface->resource.device;
4475     UINT pitch = wined3d_surface_get_pitch(surface);
4476     const struct wined3d_gl_info *gl_info;
4477     struct wined3d_context *context;
4478     RECT local_rect;
4479     UINT w, h;
4480
4481     surface_get_rect(surface, rect, &local_rect);
4482
4483     mem += local_rect.top * pitch + local_rect.left * bpp;
4484     w = local_rect.right - local_rect.left;
4485     h = local_rect.bottom - local_rect.top;
4486
4487     /* Activate the correct context for the render target */
4488     context = context_acquire(device, surface);
4489     context_apply_blit_state(context, device);
4490     gl_info = context->gl_info;
4491
4492     if (!surface_is_offscreen(surface))
4493     {
4494         GLenum buffer = surface_get_gl_buffer(surface);
4495         TRACE("Unlocking %#x buffer.\n", buffer);
4496         context_set_draw_buffer(context, buffer);
4497
4498         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4499         gl_info->gl_ops.gl.p_glPixelZoom(1.0f, -1.0f);
4500     }
4501     else
4502     {
4503         /* Primary offscreen render target */
4504         TRACE("Offscreen render target.\n");
4505         context_set_draw_buffer(context, device->offscreenBuffer);
4506
4507         gl_info->gl_ops.gl.p_glPixelZoom(1.0f, 1.0f);
4508     }
4509
4510     gl_info->gl_ops.gl.p_glRasterPos3i(local_rect.left, local_rect.top, 1);
4511     checkGLcall("glRasterPos3i");
4512
4513     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4514     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4515
4516     if (surface->flags & SFLAG_PBO)
4517     {
4518         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4519         checkGLcall("glBindBufferARB");
4520     }
4521
4522     gl_info->gl_ops.gl.p_glDrawPixels(w, h, fmt, type, mem);
4523     checkGLcall("glDrawPixels");
4524
4525     if (surface->flags & SFLAG_PBO)
4526     {
4527         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4528         checkGLcall("glBindBufferARB");
4529     }
4530
4531     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4532     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4533
4534     if (wined3d_settings.strict_draw_ordering
4535             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4536             && surface->container.u.swapchain->front_buffer == surface))
4537         gl_info->gl_ops.gl.p_glFlush();
4538
4539     context_release(context);
4540 }
4541
4542 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4543 {
4544     /* FIXME: Is this really how color keys are supposed to work? I think it
4545      * makes more sense to compare the individual channels. */
4546     return color >= color_key->color_space_low_value
4547             && color <= color_key->color_space_high_value;
4548 }
4549
4550 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4551 {
4552     const struct wined3d_device *device = surface->resource.device;
4553     const struct wined3d_palette *pal = surface->palette;
4554     BOOL index_in_alpha = FALSE;
4555     unsigned int i;
4556
4557     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4558      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4559      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4560      * duplicate entries. Store the color key in the unused alpha component to speed the
4561      * download up and to make conversion unneeded. */
4562     index_in_alpha = primary_render_target_is_p8(device);
4563
4564     if (!pal)
4565     {
4566         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4567         if (index_in_alpha)
4568         {
4569             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4570              * there's no palette at this time. */
4571             for (i = 0; i < 256; i++) table[i][3] = i;
4572         }
4573     }
4574     else
4575     {
4576         TRACE("Using surface palette %p\n", pal);
4577         /* Get the surface's palette */
4578         for (i = 0; i < 256; ++i)
4579         {
4580             table[i][0] = pal->palents[i].peRed;
4581             table[i][1] = pal->palents[i].peGreen;
4582             table[i][2] = pal->palents[i].peBlue;
4583
4584             /* When index_in_alpha is set the palette index is stored in the
4585              * alpha component. In case of a readback we can then read
4586              * GL_ALPHA. Color keying is handled in BltOverride using a
4587              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4588              * color key itself is passed to glAlphaFunc in other cases the
4589              * alpha component of pixels that should be masked away is set to 0. */
4590             if (index_in_alpha)
4591                 table[i][3] = i;
4592             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4593                 table[i][3] = 0x00;
4594             else if (pal->flags & WINEDDPCAPS_ALPHA)
4595                 table[i][3] = pal->palents[i].peFlags;
4596             else
4597                 table[i][3] = 0xff;
4598         }
4599     }
4600 }
4601
4602 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width, UINT height,
4603         UINT outpitch, enum wined3d_conversion_type conversion_type, struct wined3d_surface *surface)
4604 {
4605     const BYTE *source;
4606     BYTE *dest;
4607
4608     TRACE("src %p, dst %p, pitch %u, width %u, height %u, outpitch %u, conversion_type %#x, surface %p.\n",
4609             src, dst, pitch, width, height, outpitch, conversion_type, surface);
4610
4611     switch (conversion_type)
4612     {
4613         case WINED3D_CT_NONE:
4614         {
4615             memcpy(dst, src, pitch * height);
4616             break;
4617         }
4618
4619         case WINED3D_CT_PALETTED:
4620         case WINED3D_CT_PALETTED_CK:
4621         {
4622             BYTE table[256][4];
4623             unsigned int x, y;
4624
4625             d3dfmt_p8_init_palette(surface, table, (conversion_type == WINED3D_CT_PALETTED_CK));
4626
4627             for (y = 0; y < height; y++)
4628             {
4629                 source = src + pitch * y;
4630                 dest = dst + outpitch * y;
4631                 /* This is an 1 bpp format, using the width here is fine */
4632                 for (x = 0; x < width; x++) {
4633                     BYTE color = *source++;
4634                     *dest++ = table[color][0];
4635                     *dest++ = table[color][1];
4636                     *dest++ = table[color][2];
4637                     *dest++ = table[color][3];
4638                 }
4639             }
4640         }
4641         break;
4642
4643         case WINED3D_CT_CK_565:
4644         {
4645             /* Converting the 565 format in 5551 packed to emulate color-keying.
4646
4647               Note : in all these conversion, it would be best to average the averaging
4648                       pixels to get the color of the pixel that will be color-keyed to
4649                       prevent 'color bleeding'. This will be done later on if ever it is
4650                       too visible.
4651
4652               Note2: Nvidia documents say that their driver does not support alpha + color keying
4653                      on the same surface and disables color keying in such a case
4654             */
4655             unsigned int x, y;
4656             const WORD *Source;
4657             WORD *Dest;
4658
4659             TRACE("Color keyed 565\n");
4660
4661             for (y = 0; y < height; y++) {
4662                 Source = (const WORD *)(src + y * pitch);
4663                 Dest = (WORD *) (dst + y * outpitch);
4664                 for (x = 0; x < width; x++ ) {
4665                     WORD color = *Source++;
4666                     *Dest = ((color & 0xffc0) | ((color & 0x1f) << 1));
4667                     if (!color_in_range(&surface->src_blt_color_key, color))
4668                         *Dest |= 0x0001;
4669                     Dest++;
4670                 }
4671             }
4672         }
4673         break;
4674
4675         case WINED3D_CT_CK_5551:
4676         {
4677             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4678             unsigned int x, y;
4679             const WORD *Source;
4680             WORD *Dest;
4681             TRACE("Color keyed 5551\n");
4682             for (y = 0; y < height; y++) {
4683                 Source = (const WORD *)(src + y * pitch);
4684                 Dest = (WORD *) (dst + y * outpitch);
4685                 for (x = 0; x < width; x++ ) {
4686                     WORD color = *Source++;
4687                     *Dest = color;
4688                     if (!color_in_range(&surface->src_blt_color_key, color))
4689                         *Dest |= (1 << 15);
4690                     else
4691                         *Dest &= ~(1 << 15);
4692                     Dest++;
4693                 }
4694             }
4695         }
4696         break;
4697
4698         case WINED3D_CT_CK_RGB24:
4699         {
4700             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4701             unsigned int x, y;
4702             for (y = 0; y < height; y++)
4703             {
4704                 source = src + pitch * y;
4705                 dest = dst + outpitch * y;
4706                 for (x = 0; x < width; x++) {
4707                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4708                     DWORD dstcolor = color << 8;
4709                     if (!color_in_range(&surface->src_blt_color_key, color))
4710                         dstcolor |= 0xff;
4711                     *(DWORD*)dest = dstcolor;
4712                     source += 3;
4713                     dest += 4;
4714                 }
4715             }
4716         }
4717         break;
4718
4719         case WINED3D_CT_RGB32_888:
4720         {
4721             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4722             unsigned int x, y;
4723             for (y = 0; y < height; y++)
4724             {
4725                 source = src + pitch * y;
4726                 dest = dst + outpitch * y;
4727                 for (x = 0; x < width; x++) {
4728                     DWORD color = 0xffffff & *(const DWORD*)source;
4729                     DWORD dstcolor = color << 8;
4730                     if (!color_in_range(&surface->src_blt_color_key, color))
4731                         dstcolor |= 0xff;
4732                     *(DWORD*)dest = dstcolor;
4733                     source += 4;
4734                     dest += 4;
4735                 }
4736             }
4737         }
4738         break;
4739
4740         case WINED3D_CT_CK_ARGB32:
4741         {
4742             unsigned int x, y;
4743             for (y = 0; y < height; ++y)
4744             {
4745                 source = src + pitch * y;
4746                 dest = dst + outpitch * y;
4747                 for (x = 0; x < width; ++x)
4748                 {
4749                     DWORD color = *(const DWORD *)source;
4750                     if (color_in_range(&surface->src_blt_color_key, color))
4751                         color &= ~0xff000000;
4752                     *(DWORD*)dest = color;
4753                     source += 4;
4754                     dest += 4;
4755                 }
4756             }
4757         }
4758         break;
4759
4760         default:
4761             ERR("Unsupported conversion type %#x.\n", conversion_type);
4762     }
4763     return WINED3D_OK;
4764 }
4765
4766 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4767 {
4768     /* Flip the surface contents */
4769     /* Flip the DC */
4770     {
4771         HDC tmp;
4772         tmp = front->hDC;
4773         front->hDC = back->hDC;
4774         back->hDC = tmp;
4775     }
4776
4777     /* Flip the DIBsection */
4778     {
4779         HBITMAP tmp = front->dib.DIBsection;
4780         front->dib.DIBsection = back->dib.DIBsection;
4781         back->dib.DIBsection = tmp;
4782     }
4783
4784     /* Flip the surface data */
4785     {
4786         void* tmp;
4787
4788         tmp = front->dib.bitmap_data;
4789         front->dib.bitmap_data = back->dib.bitmap_data;
4790         back->dib.bitmap_data = tmp;
4791
4792         tmp = front->resource.allocatedMemory;
4793         front->resource.allocatedMemory = back->resource.allocatedMemory;
4794         back->resource.allocatedMemory = tmp;
4795
4796         tmp = front->resource.heapMemory;
4797         front->resource.heapMemory = back->resource.heapMemory;
4798         back->resource.heapMemory = tmp;
4799     }
4800
4801     /* Flip the PBO */
4802     {
4803         GLuint tmp_pbo = front->pbo;
4804         front->pbo = back->pbo;
4805         back->pbo = tmp_pbo;
4806     }
4807
4808     /* Flip the opengl texture */
4809     {
4810         GLuint tmp;
4811
4812         tmp = back->texture_name;
4813         back->texture_name = front->texture_name;
4814         front->texture_name = tmp;
4815
4816         tmp = back->texture_name_srgb;
4817         back->texture_name_srgb = front->texture_name_srgb;
4818         front->texture_name_srgb = tmp;
4819
4820         tmp = back->rb_multisample;
4821         back->rb_multisample = front->rb_multisample;
4822         front->rb_multisample = tmp;
4823
4824         tmp = back->rb_resolved;
4825         back->rb_resolved = front->rb_resolved;
4826         front->rb_resolved = tmp;
4827
4828         resource_unload(&back->resource);
4829         resource_unload(&front->resource);
4830     }
4831
4832     {
4833         DWORD tmp_flags = back->flags;
4834         back->flags = front->flags;
4835         front->flags = tmp_flags;
4836     }
4837 }
4838
4839 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4840  * pixel copy calls. */
4841 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4842         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4843 {
4844     struct wined3d_device *device = dst_surface->resource.device;
4845     const struct wined3d_gl_info *gl_info;
4846     float xrel, yrel;
4847     struct wined3d_context *context;
4848     BOOL upsidedown = FALSE;
4849     RECT dst_rect = *dst_rect_in;
4850     GLenum dst_target;
4851
4852     if (dst_surface->container.type == WINED3D_CONTAINER_TEXTURE)
4853         dst_target = dst_surface->container.u.texture->target;
4854     else
4855         dst_target = dst_surface->texture_target;
4856
4857     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4858      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4859      */
4860     if(dst_rect.top > dst_rect.bottom) {
4861         UINT tmp = dst_rect.bottom;
4862         dst_rect.bottom = dst_rect.top;
4863         dst_rect.top = tmp;
4864         upsidedown = TRUE;
4865     }
4866
4867     context = context_acquire(device, src_surface);
4868     gl_info = context->gl_info;
4869     context_apply_blit_state(context, device);
4870     surface_internal_preload(dst_surface, SRGB_RGB);
4871
4872     /* Bind the target texture */
4873     context_bind_texture(context, dst_target, dst_surface->texture_name);
4874     if (surface_is_offscreen(src_surface))
4875     {
4876         TRACE("Reading from an offscreen target\n");
4877         upsidedown = !upsidedown;
4878         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
4879     }
4880     else
4881     {
4882         gl_info->gl_ops.gl.p_glReadBuffer(surface_get_gl_buffer(src_surface));
4883     }
4884     checkGLcall("glReadBuffer");
4885
4886     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4887     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4888
4889     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4890     {
4891         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4892
4893         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4894             ERR("Texture filtering not supported in direct blit.\n");
4895     }
4896     else if ((filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4897             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4898     {
4899         ERR("Texture filtering not supported in direct blit\n");
4900     }
4901
4902     if (upsidedown
4903             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4904             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4905     {
4906         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do. */
4907         gl_info->gl_ops.gl.p_glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4908                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4909                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4910                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4911     }
4912     else
4913     {
4914         LONG row;
4915         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4916         /* I have to process this row by row to swap the image,
4917          * otherwise it would be upside down, so stretching in y direction
4918          * doesn't cost extra time
4919          *
4920          * However, stretching in x direction can be avoided if not necessary
4921          */
4922         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4923             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4924             {
4925                 /* Well, that stuff works, but it's very slow.
4926                  * find a better way instead
4927                  */
4928                 LONG col;
4929
4930                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4931                 {
4932                     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4933                             dst_rect.left + col /* x offset */, row /* y offset */,
4934                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4935                 }
4936             }
4937             else
4938             {
4939                 gl_info->gl_ops.gl.p_glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4940                         dst_rect.left /* x offset */, row /* y offset */,
4941                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4942             }
4943         }
4944     }
4945     checkGLcall("glCopyTexSubImage2D");
4946
4947     context_release(context);
4948
4949     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4950      * path is never entered
4951      */
4952     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4953 }
4954
4955 /* Uses the hardware to stretch and flip the image */
4956 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4957         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4958 {
4959     struct wined3d_device *device = dst_surface->resource.device;
4960     struct wined3d_swapchain *src_swapchain = NULL;
4961     GLuint src, backup = 0;
4962     float left, right, top, bottom; /* Texture coordinates */
4963     UINT fbwidth = src_surface->resource.width;
4964     UINT fbheight = src_surface->resource.height;
4965     const struct wined3d_gl_info *gl_info;
4966     struct wined3d_context *context;
4967     GLenum drawBuffer = GL_BACK;
4968     GLenum texture_target;
4969     BOOL noBackBufferBackup;
4970     BOOL src_offscreen;
4971     BOOL upsidedown = FALSE;
4972     RECT dst_rect = *dst_rect_in;
4973
4974     TRACE("Using hwstretch blit\n");
4975     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4976     context = context_acquire(device, src_surface);
4977     gl_info = context->gl_info;
4978     context_apply_blit_state(context, device);
4979     surface_internal_preload(dst_surface, SRGB_RGB);
4980
4981     src_offscreen = surface_is_offscreen(src_surface);
4982     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4983     if (!noBackBufferBackup && !src_surface->texture_name)
4984     {
4985         /* Get it a description */
4986         surface_internal_preload(src_surface, SRGB_RGB);
4987     }
4988
4989     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4990      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4991      */
4992     if (context->aux_buffers >= 2)
4993     {
4994         /* Got more than one aux buffer? Use the 2nd aux buffer */
4995         drawBuffer = GL_AUX1;
4996     }
4997     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4998     {
4999         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
5000         drawBuffer = GL_AUX0;
5001     }
5002
5003     if (noBackBufferBackup)
5004     {
5005         gl_info->gl_ops.gl.p_glGenTextures(1, &backup);
5006         checkGLcall("glGenTextures");
5007         context_bind_texture(context, GL_TEXTURE_2D, backup);
5008         texture_target = GL_TEXTURE_2D;
5009     }
5010     else
5011     {
5012         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5013          * we are reading from the back buffer, the backup can be used as source texture
5014          */
5015         texture_target = src_surface->texture_target;
5016         context_bind_texture(context, texture_target, src_surface->texture_name);
5017         gl_info->gl_ops.gl.p_glEnable(texture_target);
5018         checkGLcall("glEnable(texture_target)");
5019
5020         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5021         src_surface->flags &= ~SFLAG_INTEXTURE;
5022     }
5023
5024     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5025      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5026      */
5027     if(dst_rect.top > dst_rect.bottom) {
5028         UINT tmp = dst_rect.bottom;
5029         dst_rect.bottom = dst_rect.top;
5030         dst_rect.top = tmp;
5031         upsidedown = TRUE;
5032     }
5033
5034     if (src_offscreen)
5035     {
5036         TRACE("Reading from an offscreen target\n");
5037         upsidedown = !upsidedown;
5038         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
5039     }
5040     else
5041     {
5042         gl_info->gl_ops.gl.p_glReadBuffer(surface_get_gl_buffer(src_surface));
5043     }
5044
5045     /* TODO: Only back up the part that will be overwritten */
5046     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(texture_target, 0, 0, 0, 0, 0, fbwidth, fbheight);
5047
5048     checkGLcall("glCopyTexSubImage2D");
5049
5050     /* No issue with overriding these - the sampler is dirty due to blit usage */
5051     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5052             wined3d_gl_mag_filter(magLookup, filter));
5053     checkGLcall("glTexParameteri");
5054     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5055             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
5056     checkGLcall("glTexParameteri");
5057
5058     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5059         src_swapchain = src_surface->container.u.swapchain;
5060     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5061     {
5062         src = backup ? backup : src_surface->texture_name;
5063     }
5064     else
5065     {
5066         gl_info->gl_ops.gl.p_glReadBuffer(GL_FRONT);
5067         checkGLcall("glReadBuffer(GL_FRONT)");
5068
5069         gl_info->gl_ops.gl.p_glGenTextures(1, &src);
5070         checkGLcall("glGenTextures(1, &src)");
5071         context_bind_texture(context, GL_TEXTURE_2D, src);
5072
5073         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5074          * out for power of 2 sizes
5075          */
5076         gl_info->gl_ops.gl.p_glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5077                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5078         checkGLcall("glTexImage2D");
5079         gl_info->gl_ops.gl.p_glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, fbwidth, fbheight);
5080
5081         gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5082         checkGLcall("glTexParameteri");
5083         gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5084         checkGLcall("glTexParameteri");
5085
5086         gl_info->gl_ops.gl.p_glReadBuffer(GL_BACK);
5087         checkGLcall("glReadBuffer(GL_BACK)");
5088
5089         if (texture_target != GL_TEXTURE_2D)
5090         {
5091             gl_info->gl_ops.gl.p_glDisable(texture_target);
5092             gl_info->gl_ops.gl.p_glEnable(GL_TEXTURE_2D);
5093             texture_target = GL_TEXTURE_2D;
5094         }
5095     }
5096     checkGLcall("glEnd and previous");
5097
5098     left = src_rect->left;
5099     right = src_rect->right;
5100
5101     if (!upsidedown)
5102     {
5103         top = src_surface->resource.height - src_rect->top;
5104         bottom = src_surface->resource.height - src_rect->bottom;
5105     }
5106     else
5107     {
5108         top = src_surface->resource.height - src_rect->bottom;
5109         bottom = src_surface->resource.height - src_rect->top;
5110     }
5111
5112     if (src_surface->flags & SFLAG_NORMCOORD)
5113     {
5114         left /= src_surface->pow2Width;
5115         right /= src_surface->pow2Width;
5116         top /= src_surface->pow2Height;
5117         bottom /= src_surface->pow2Height;
5118     }
5119
5120     /* draw the source texture stretched and upside down. The correct surface is bound already */
5121     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5122     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5123
5124     context_set_draw_buffer(context, drawBuffer);
5125     gl_info->gl_ops.gl.p_glReadBuffer(drawBuffer);
5126
5127     gl_info->gl_ops.gl.p_glBegin(GL_QUADS);
5128         /* bottom left */
5129         gl_info->gl_ops.gl.p_glTexCoord2f(left, bottom);
5130         gl_info->gl_ops.gl.p_glVertex2i(0, 0);
5131
5132         /* top left */
5133         gl_info->gl_ops.gl.p_glTexCoord2f(left, top);
5134         gl_info->gl_ops.gl.p_glVertex2i(0, dst_rect.bottom - dst_rect.top);
5135
5136         /* top right */
5137         gl_info->gl_ops.gl.p_glTexCoord2f(right, top);
5138         gl_info->gl_ops.gl.p_glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5139
5140         /* bottom right */
5141         gl_info->gl_ops.gl.p_glTexCoord2f(right, bottom);
5142         gl_info->gl_ops.gl.p_glVertex2i(dst_rect.right - dst_rect.left, 0);
5143     gl_info->gl_ops.gl.p_glEnd();
5144     checkGLcall("glEnd and previous");
5145
5146     if (texture_target != dst_surface->texture_target)
5147     {
5148         gl_info->gl_ops.gl.p_glDisable(texture_target);
5149         gl_info->gl_ops.gl.p_glEnable(dst_surface->texture_target);
5150         texture_target = dst_surface->texture_target;
5151     }
5152
5153     /* Now read the stretched and upside down image into the destination texture */
5154     context_bind_texture(context, texture_target, dst_surface->texture_name);
5155     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(texture_target,
5156                         0,
5157                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5158                         0, 0, /* We blitted the image to the origin */
5159                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5160     checkGLcall("glCopyTexSubImage2D");
5161
5162     if (drawBuffer == GL_BACK)
5163     {
5164         /* Write the back buffer backup back. */
5165         if (backup)
5166         {
5167             if (texture_target != GL_TEXTURE_2D)
5168             {
5169                 gl_info->gl_ops.gl.p_glDisable(texture_target);
5170                 gl_info->gl_ops.gl.p_glEnable(GL_TEXTURE_2D);
5171                 texture_target = GL_TEXTURE_2D;
5172             }
5173             context_bind_texture(context, GL_TEXTURE_2D, backup);
5174         }
5175         else
5176         {
5177             if (texture_target != src_surface->texture_target)
5178             {
5179                 gl_info->gl_ops.gl.p_glDisable(texture_target);
5180                 gl_info->gl_ops.gl.p_glEnable(src_surface->texture_target);
5181                 texture_target = src_surface->texture_target;
5182             }
5183             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5184         }
5185
5186         gl_info->gl_ops.gl.p_glBegin(GL_QUADS);
5187             /* top left */
5188             gl_info->gl_ops.gl.p_glTexCoord2f(0.0f, 0.0f);
5189             gl_info->gl_ops.gl.p_glVertex2i(0, fbheight);
5190
5191             /* bottom left */
5192             gl_info->gl_ops.gl.p_glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5193             gl_info->gl_ops.gl.p_glVertex2i(0, 0);
5194
5195             /* bottom right */
5196             gl_info->gl_ops.gl.p_glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5197                     (float)fbheight / (float)src_surface->pow2Height);
5198             gl_info->gl_ops.gl.p_glVertex2i(fbwidth, 0);
5199
5200             /* top right */
5201             gl_info->gl_ops.gl.p_glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5202             gl_info->gl_ops.gl.p_glVertex2i(fbwidth, fbheight);
5203         gl_info->gl_ops.gl.p_glEnd();
5204     }
5205     gl_info->gl_ops.gl.p_glDisable(texture_target);
5206     checkGLcall("glDisable(texture_target)");
5207
5208     /* Cleanup */
5209     if (src != src_surface->texture_name && src != backup)
5210     {
5211         gl_info->gl_ops.gl.p_glDeleteTextures(1, &src);
5212         checkGLcall("glDeleteTextures(1, &src)");
5213     }
5214     if (backup)
5215     {
5216         gl_info->gl_ops.gl.p_glDeleteTextures(1, &backup);
5217         checkGLcall("glDeleteTextures(1, &backup)");
5218     }
5219
5220     if (wined3d_settings.strict_draw_ordering)
5221         gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5222
5223     context_release(context);
5224
5225     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5226      * path is never entered
5227      */
5228     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5229 }
5230
5231 /* Front buffer coordinates are always full screen coordinates, but our GL
5232  * drawable is limited to the window's client area. The sysmem and texture
5233  * copies do have the full screen size. Note that GL has a bottom-left
5234  * origin, while D3D has a top-left origin. */
5235 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5236 {
5237     UINT drawable_height;
5238
5239     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5240             && surface == surface->container.u.swapchain->front_buffer)
5241     {
5242         POINT offset = {0, 0};
5243         RECT windowsize;
5244
5245         ScreenToClient(window, &offset);
5246         OffsetRect(rect, offset.x, offset.y);
5247
5248         GetClientRect(window, &windowsize);
5249         drawable_height = windowsize.bottom - windowsize.top;
5250     }
5251     else
5252     {
5253         drawable_height = surface->resource.height;
5254     }
5255
5256     rect->top = drawable_height - rect->top;
5257     rect->bottom = drawable_height - rect->bottom;
5258 }
5259
5260 static void surface_blt_to_drawable(const struct wined3d_device *device,
5261         enum wined3d_texture_filter_type filter, BOOL color_key,
5262         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5263         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5264 {
5265     const struct wined3d_gl_info *gl_info;
5266     struct wined3d_context *context;
5267     RECT src_rect, dst_rect;
5268
5269     src_rect = *src_rect_in;
5270     dst_rect = *dst_rect_in;
5271
5272     /* Make sure the surface is up-to-date. This should probably use
5273      * surface_load_location() and worry about the destination surface too,
5274      * unless we're overwriting it completely. */
5275     surface_internal_preload(src_surface, SRGB_RGB);
5276
5277     /* Activate the destination context, set it up for blitting */
5278     context = context_acquire(device, dst_surface);
5279     gl_info = context->gl_info;
5280     context_apply_blit_state(context, device);
5281
5282     if (!surface_is_offscreen(dst_surface))
5283         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5284
5285     device->blitter->set_shader(device->blit_priv, context, src_surface);
5286
5287     if (color_key)
5288     {
5289         gl_info->gl_ops.gl.p_glEnable(GL_ALPHA_TEST);
5290         checkGLcall("glEnable(GL_ALPHA_TEST)");
5291
5292         /* When the primary render target uses P8, the alpha component
5293          * contains the palette index. Which means that the colorkey is one of
5294          * the palette entries. In other cases pixels that should be masked
5295          * away have alpha set to 0. */
5296         if (primary_render_target_is_p8(device))
5297             gl_info->gl_ops.gl.p_glAlphaFunc(GL_NOTEQUAL,
5298                     (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5299         else
5300             gl_info->gl_ops.gl.p_glAlphaFunc(GL_NOTEQUAL, 0.0f);
5301         checkGLcall("glAlphaFunc");
5302     }
5303     else
5304     {
5305         gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
5306         checkGLcall("glDisable(GL_ALPHA_TEST)");
5307     }
5308
5309     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5310
5311     if (color_key)
5312     {
5313         gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
5314         checkGLcall("glDisable(GL_ALPHA_TEST)");
5315     }
5316
5317     /* Leave the opengl state valid for blitting */
5318     device->blitter->unset_shader(context->gl_info);
5319
5320     if (wined3d_settings.strict_draw_ordering
5321             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5322             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5323         gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5324
5325     context_release(context);
5326 }
5327
5328 /* Do not call while under the GL lock. */
5329 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5330 {
5331     struct wined3d_device *device = s->resource.device;
5332     const struct blit_shader *blitter;
5333
5334     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5335             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5336     if (!blitter)
5337     {
5338         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5339         return WINED3DERR_INVALIDCALL;
5340     }
5341
5342     return blitter->color_fill(device, s, rect, color);
5343 }
5344
5345 /* Do not call while under the GL lock. */
5346 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5347         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5348         enum wined3d_texture_filter_type filter)
5349 {
5350     struct wined3d_device *device = dst_surface->resource.device;
5351     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5352     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5353
5354     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5355             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5356             flags, DDBltFx, debug_d3dtexturefiltertype(filter));
5357
5358     /* Get the swapchain. One of the surfaces has to be a primary surface */
5359     if (dst_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5360     {
5361         WARN("Destination is in sysmem, rejecting gl blt\n");
5362         return WINED3DERR_INVALIDCALL;
5363     }
5364
5365     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5366         dstSwapchain = dst_surface->container.u.swapchain;
5367
5368     if (src_surface)
5369     {
5370         if (src_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5371         {
5372             WARN("Src is in sysmem, rejecting gl blt\n");
5373             return WINED3DERR_INVALIDCALL;
5374         }
5375
5376         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5377             srcSwapchain = src_surface->container.u.swapchain;
5378     }
5379
5380     /* Early sort out of cases where no render target is used */
5381     if (!dstSwapchain && !srcSwapchain
5382             && src_surface != device->fb.render_targets[0]
5383             && dst_surface != device->fb.render_targets[0])
5384     {
5385         TRACE("No surface is render target, not using hardware blit.\n");
5386         return WINED3DERR_INVALIDCALL;
5387     }
5388
5389     /* No destination color keying supported */
5390     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5391     {
5392         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5393         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5394         return WINED3DERR_INVALIDCALL;
5395     }
5396
5397     if (dstSwapchain && dstSwapchain == srcSwapchain)
5398     {
5399         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5400         return WINED3DERR_INVALIDCALL;
5401     }
5402
5403     if (dstSwapchain && srcSwapchain)
5404     {
5405         FIXME("Implement hardware blit between two different swapchains\n");
5406         return WINED3DERR_INVALIDCALL;
5407     }
5408
5409     if (dstSwapchain)
5410     {
5411         /* Handled with regular texture -> swapchain blit */
5412         if (src_surface == device->fb.render_targets[0])
5413             TRACE("Blit from active render target to a swapchain\n");
5414     }
5415     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5416     {
5417         FIXME("Implement blit from a swapchain to the active render target\n");
5418         return WINED3DERR_INVALIDCALL;
5419     }
5420
5421     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5422     {
5423         /* Blit from render target to texture */
5424         BOOL stretchx;
5425
5426         /* P8 read back is not implemented */
5427         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5428                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5429         {
5430             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5431             return WINED3DERR_INVALIDCALL;
5432         }
5433
5434         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5435         {
5436             TRACE("Color keying not supported by frame buffer to texture blit\n");
5437             return WINED3DERR_INVALIDCALL;
5438             /* Destination color key is checked above */
5439         }
5440
5441         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5442             stretchx = TRUE;
5443         else
5444             stretchx = FALSE;
5445
5446         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5447          * flip the image nor scale it.
5448          *
5449          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5450          * -> If the app wants a image width an unscaled width, copy it line per line
5451          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5452          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5453          *    back buffer. This is slower than reading line per line, thus not used for flipping
5454          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5455          *    pixel by pixel. */
5456         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5457                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5458         {
5459             TRACE("No stretching in x direction, using direct framebuffer -> texture copy.\n");
5460             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, filter);
5461         }
5462         else
5463         {
5464             TRACE("Using hardware stretching to flip / stretch the texture.\n");
5465             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, filter);
5466         }
5467
5468         if (!dst_surface->resource.map_count && !(dst_surface->flags & SFLAG_DONOTFREE))
5469         {
5470             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5471             dst_surface->resource.allocatedMemory = NULL;
5472             dst_surface->resource.heapMemory = NULL;
5473         }
5474         else
5475         {
5476             dst_surface->flags &= ~SFLAG_INSYSMEM;
5477         }
5478
5479         return WINED3D_OK;
5480     }
5481     else if (src_surface)
5482     {
5483         /* Blit from offscreen surface to render target */
5484         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5485         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5486
5487         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5488
5489         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5490                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5491                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5492         {
5493             FIXME("Unsupported blit operation falling back to software\n");
5494             return WINED3DERR_INVALIDCALL;
5495         }
5496
5497         /* Color keying: Check if we have to do a color keyed blt,
5498          * and if not check if a color key is activated.
5499          *
5500          * Just modify the color keying parameters in the surface and restore them afterwards
5501          * The surface keeps track of the color key last used to load the opengl surface.
5502          * PreLoad will catch the change to the flags and color key and reload if necessary.
5503          */
5504         if (flags & WINEDDBLT_KEYSRC)
5505         {
5506             /* Use color key from surface */
5507         }
5508         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5509         {
5510             /* Use color key from DDBltFx */
5511             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5512             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5513         }
5514         else
5515         {
5516             /* Do not use color key */
5517             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5518         }
5519
5520         surface_blt_to_drawable(device, filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5521                 src_surface, src_rect, dst_surface, dst_rect);
5522
5523         /* Restore the color key parameters */
5524         src_surface->CKeyFlags = oldCKeyFlags;
5525         src_surface->src_blt_color_key = old_blt_key;
5526
5527         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5528
5529         return WINED3D_OK;
5530     }
5531
5532     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5533     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5534     return WINED3DERR_INVALIDCALL;
5535 }
5536
5537 /* Context activation is done by the caller. */
5538 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5539         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5540 {
5541     struct wined3d_device *device = surface->resource.device;
5542     const struct wined3d_gl_info *gl_info = context->gl_info;
5543     GLint compare_mode = GL_NONE;
5544     struct blt_info info;
5545     GLint old_binding = 0;
5546     RECT rect;
5547
5548     gl_info->gl_ops.gl.p_glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5549
5550     gl_info->gl_ops.gl.p_glDisable(GL_CULL_FACE);
5551     gl_info->gl_ops.gl.p_glDisable(GL_BLEND);
5552     gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
5553     gl_info->gl_ops.gl.p_glDisable(GL_SCISSOR_TEST);
5554     gl_info->gl_ops.gl.p_glDisable(GL_STENCIL_TEST);
5555     gl_info->gl_ops.gl.p_glEnable(GL_DEPTH_TEST);
5556     gl_info->gl_ops.gl.p_glDepthFunc(GL_ALWAYS);
5557     gl_info->gl_ops.gl.p_glDepthMask(GL_TRUE);
5558     gl_info->gl_ops.gl.p_glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5559     gl_info->gl_ops.gl.p_glViewport(x, y, w, h);
5560     gl_info->gl_ops.gl.p_glDepthRange(0.0, 1.0);
5561
5562     SetRect(&rect, 0, h, w, 0);
5563     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5564     context_active_texture(context, context->gl_info, 0);
5565     gl_info->gl_ops.gl.p_glGetIntegerv(info.binding, &old_binding);
5566     gl_info->gl_ops.gl.p_glBindTexture(info.bind_target, texture);
5567     if (gl_info->supported[ARB_SHADOW])
5568     {
5569         gl_info->gl_ops.gl.p_glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5570         if (compare_mode != GL_NONE)
5571             gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5572     }
5573
5574     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5575             gl_info, info.tex_type, &surface->ds_current_size);
5576
5577     gl_info->gl_ops.gl.p_glBegin(GL_TRIANGLE_STRIP);
5578     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[0]);
5579     gl_info->gl_ops.gl.p_glVertex2f(-1.0f, -1.0f);
5580     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[1]);
5581     gl_info->gl_ops.gl.p_glVertex2f(1.0f, -1.0f);
5582     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[2]);
5583     gl_info->gl_ops.gl.p_glVertex2f(-1.0f, 1.0f);
5584     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[3]);
5585     gl_info->gl_ops.gl.p_glVertex2f(1.0f, 1.0f);
5586     gl_info->gl_ops.gl.p_glEnd();
5587
5588     if (compare_mode != GL_NONE)
5589         gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5590     gl_info->gl_ops.gl.p_glBindTexture(info.bind_target, old_binding);
5591
5592     gl_info->gl_ops.gl.p_glPopAttrib();
5593
5594     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5595 }
5596
5597 void surface_modify_ds_location(struct wined3d_surface *surface,
5598         DWORD location, UINT w, UINT h)
5599 {
5600     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5601
5602     if (location & ~(SFLAG_LOCATIONS | SFLAG_DISCARDED))
5603         FIXME("Invalid location (%#x) specified.\n", location);
5604
5605     if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5606             || (!(surface->flags & SFLAG_INTEXTURE) && (location & SFLAG_INTEXTURE)))
5607     {
5608         if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5609         {
5610             TRACE("Passing to container.\n");
5611             wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5612         }
5613     }
5614
5615     surface->ds_current_size.cx = w;
5616     surface->ds_current_size.cy = h;
5617     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_DISCARDED);
5618     surface->flags |= location;
5619 }
5620
5621 /* Context activation is done by the caller. */
5622 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5623 {
5624     const struct wined3d_gl_info *gl_info = context->gl_info;
5625     struct wined3d_device *device = surface->resource.device;
5626     GLsizei w, h;
5627
5628     TRACE("surface %p, new location %#x.\n", surface, location);
5629
5630     /* TODO: Make this work for modes other than FBO */
5631     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5632
5633     if (!(surface->flags & location))
5634     {
5635         w = surface->ds_current_size.cx;
5636         h = surface->ds_current_size.cy;
5637         surface->ds_current_size.cx = 0;
5638         surface->ds_current_size.cy = 0;
5639     }
5640     else
5641     {
5642         w = surface->resource.width;
5643         h = surface->resource.height;
5644     }
5645
5646     if (surface->ds_current_size.cx == surface->resource.width
5647             && surface->ds_current_size.cy == surface->resource.height)
5648     {
5649         TRACE("Location (%#x) is already up to date.\n", location);
5650         return;
5651     }
5652
5653     if (surface->current_renderbuffer)
5654     {
5655         FIXME("Not supported with fixed up depth stencil.\n");
5656         return;
5657     }
5658
5659     if (surface->flags & SFLAG_DISCARDED)
5660     {
5661         TRACE("Surface was discarded, no need copy data.\n");
5662         switch (location)
5663         {
5664             case SFLAG_INTEXTURE:
5665                 surface_prepare_texture(surface, context, FALSE);
5666                 break;
5667             case SFLAG_INRB_MULTISAMPLE:
5668                 surface_prepare_rb(surface, gl_info, TRUE);
5669                 break;
5670             case SFLAG_INDRAWABLE:
5671                 /* Nothing to do */
5672                 break;
5673             default:
5674                 FIXME("Unhandled location %#x\n", location);
5675         }
5676         surface->flags &= ~SFLAG_DISCARDED;
5677         surface->flags |= location;
5678         surface->ds_current_size.cx = surface->resource.width;
5679         surface->ds_current_size.cy = surface->resource.height;
5680         return;
5681     }
5682
5683     if (!(surface->flags & SFLAG_LOCATIONS))
5684     {
5685         FIXME("No up to date depth stencil location.\n");
5686         surface->flags |= location;
5687         surface->ds_current_size.cx = surface->resource.width;
5688         surface->ds_current_size.cy = surface->resource.height;
5689         return;
5690     }
5691
5692     if (location == SFLAG_INTEXTURE)
5693     {
5694         GLint old_binding = 0;
5695         GLenum bind_target;
5696
5697         /* The render target is allowed to be smaller than the depth/stencil
5698          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5699          * than the offscreen surface. Don't overwrite the offscreen surface
5700          * with undefined data. */
5701         w = min(w, context->swapchain->desc.backbuffer_width);
5702         h = min(h, context->swapchain->desc.backbuffer_height);
5703
5704         TRACE("Copying onscreen depth buffer to depth texture.\n");
5705
5706         if (!device->depth_blt_texture)
5707             gl_info->gl_ops.gl.p_glGenTextures(1, &device->depth_blt_texture);
5708
5709         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5710          * directly on the FBO texture. That's because we need to flip. */
5711         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5712                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5713         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5714         {
5715             gl_info->gl_ops.gl.p_glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5716             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5717         }
5718         else
5719         {
5720             gl_info->gl_ops.gl.p_glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5721             bind_target = GL_TEXTURE_2D;
5722         }
5723         gl_info->gl_ops.gl.p_glBindTexture(bind_target, device->depth_blt_texture);
5724         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5725          * internal format, because the internal format might include stencil
5726          * data. In principle we should copy stencil data as well, but unless
5727          * the driver supports stencil export it's hard to do, and doesn't
5728          * seem to be needed in practice. If the hardware doesn't support
5729          * writing stencil data, the glCopyTexImage2D() call might trigger
5730          * software fallbacks. */
5731         gl_info->gl_ops.gl.p_glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5732         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5733         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5734         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5735         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5736         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5737         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5738         gl_info->gl_ops.gl.p_glBindTexture(bind_target, old_binding);
5739
5740         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5741                 NULL, surface, SFLAG_INTEXTURE);
5742         context_set_draw_buffer(context, GL_NONE);
5743
5744         /* Do the actual blit */
5745         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5746         checkGLcall("depth_blt");
5747
5748         context_invalidate_state(context, STATE_FRAMEBUFFER);
5749
5750         if (wined3d_settings.strict_draw_ordering)
5751             gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5752     }
5753     else if (location == SFLAG_INDRAWABLE)
5754     {
5755         TRACE("Copying depth texture to onscreen depth buffer.\n");
5756
5757         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5758                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5759         surface_depth_blt(surface, context, surface->texture_name,
5760                 0, surface->pow2Height - h, w, h, surface->texture_target);
5761         checkGLcall("depth_blt");
5762
5763         context_invalidate_state(context, STATE_FRAMEBUFFER);
5764
5765         if (wined3d_settings.strict_draw_ordering)
5766             gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5767     }
5768     else
5769     {
5770         ERR("Invalid location (%#x) specified.\n", location);
5771     }
5772
5773     surface->flags |= location;
5774     surface->ds_current_size.cx = surface->resource.width;
5775     surface->ds_current_size.cy = surface->resource.height;
5776 }
5777
5778 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5779 {
5780     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5781     struct wined3d_surface *overlay;
5782
5783     TRACE("surface %p, location %s, persistent %#x.\n",
5784             surface, debug_surflocation(location), persistent);
5785
5786     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5787             && !(surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5788             && (location & SFLAG_INDRAWABLE))
5789         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5790
5791     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5792             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5793         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5794
5795     if (persistent)
5796     {
5797         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5798                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5799         {
5800             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5801             {
5802                 TRACE("Passing to container.\n");
5803                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5804             }
5805         }
5806         surface->flags &= ~SFLAG_LOCATIONS;
5807         surface->flags |= location;
5808
5809         /* Redraw emulated overlays, if any */
5810         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5811         {
5812             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5813             {
5814                 surface_draw_overlay(overlay);
5815             }
5816         }
5817     }
5818     else
5819     {
5820         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5821         {
5822             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5823             {
5824                 TRACE("Passing to container\n");
5825                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5826             }
5827         }
5828         surface->flags &= ~location;
5829     }
5830
5831     if (!(surface->flags & SFLAG_LOCATIONS))
5832     {
5833         ERR("Surface %p does not have any up to date location.\n", surface);
5834     }
5835 }
5836
5837 static DWORD resource_access_from_location(DWORD location)
5838 {
5839     switch (location)
5840     {
5841         case SFLAG_INSYSMEM:
5842             return WINED3D_RESOURCE_ACCESS_CPU;
5843
5844         case SFLAG_INDRAWABLE:
5845         case SFLAG_INSRGBTEX:
5846         case SFLAG_INTEXTURE:
5847         case SFLAG_INRB_MULTISAMPLE:
5848         case SFLAG_INRB_RESOLVED:
5849             return WINED3D_RESOURCE_ACCESS_GPU;
5850
5851         default:
5852             FIXME("Unhandled location %#x.\n", location);
5853             return 0;
5854     }
5855 }
5856
5857 static void surface_load_sysmem(struct wined3d_surface *surface,
5858         const struct wined3d_gl_info *gl_info, const RECT *rect)
5859 {
5860     surface_prepare_system_memory(surface);
5861
5862     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5863         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5864
5865     /* Download the surface to system memory. */
5866     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5867     {
5868         struct wined3d_device *device = surface->resource.device;
5869         struct wined3d_context *context;
5870
5871         /* TODO: Use already acquired context when possible. */
5872         context = context_acquire(device, NULL);
5873
5874         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5875         surface_download_data(surface, gl_info);
5876
5877         context_release(context);
5878
5879         return;
5880     }
5881
5882     if (surface->flags & SFLAG_INDRAWABLE)
5883     {
5884         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5885                 wined3d_surface_get_pitch(surface));
5886         return;
5887     }
5888
5889     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5890             surface, surface->flags & SFLAG_LOCATIONS);
5891 }
5892
5893 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5894         const struct wined3d_gl_info *gl_info, const RECT *rect)
5895 {
5896     struct wined3d_device *device = surface->resource.device;
5897     enum wined3d_conversion_type convert;
5898     struct wined3d_format format;
5899     UINT byte_count;
5900     BYTE *mem;
5901
5902     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5903     {
5904         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5905         return WINED3DERR_INVALIDCALL;
5906     }
5907
5908     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5909         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5910
5911     if (surface->flags & SFLAG_INTEXTURE)
5912     {
5913         RECT r;
5914
5915         surface_get_rect(surface, rect, &r);
5916         surface_blt_to_drawable(device, WINED3D_TEXF_POINT, FALSE, surface, &r, surface, &r);
5917
5918         return WINED3D_OK;
5919     }
5920
5921     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5922     {
5923         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5924          * path through sysmem. */
5925         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5926     }
5927
5928     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5929
5930     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5931      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5932      * called. */
5933     if ((convert != WINED3D_CT_NONE) && (surface->flags & SFLAG_PBO))
5934     {
5935         struct wined3d_context *context;
5936
5937         TRACE("Removing the pbo attached to surface %p.\n", surface);
5938
5939         /* TODO: Use already acquired context when possible. */
5940         context = context_acquire(device, NULL);
5941
5942         surface_remove_pbo(surface, gl_info);
5943
5944         context_release(context);
5945     }
5946
5947     if ((convert != WINED3D_CT_NONE) && surface->resource.allocatedMemory)
5948     {
5949         UINT height = surface->resource.height;
5950         UINT width = surface->resource.width;
5951         UINT src_pitch, dst_pitch;
5952
5953         byte_count = format.conv_byte_count;
5954         src_pitch = wined3d_surface_get_pitch(surface);
5955
5956         /* Stick to the alignment for the converted surface too, makes it
5957          * easier to load the surface. */
5958         dst_pitch = width * byte_count;
5959         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5960
5961         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5962         {
5963             ERR("Out of memory (%u).\n", dst_pitch * height);
5964             return E_OUTOFMEMORY;
5965         }
5966
5967         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5968                 src_pitch, width, height, dst_pitch, convert, surface);
5969
5970         surface->flags |= SFLAG_CONVERTED;
5971     }
5972     else
5973     {
5974         surface->flags &= ~SFLAG_CONVERTED;
5975         mem = surface->resource.allocatedMemory;
5976         byte_count = format.byte_count;
5977     }
5978
5979     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5980
5981     /* Don't delete PBO memory. */
5982     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5983         HeapFree(GetProcessHeap(), 0, mem);
5984
5985     return WINED3D_OK;
5986 }
5987
5988 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5989         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5990 {
5991     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5992     struct wined3d_device *device = surface->resource.device;
5993     enum wined3d_conversion_type convert;
5994     struct wined3d_context *context;
5995     UINT width, src_pitch, dst_pitch;
5996     struct wined3d_bo_address data;
5997     struct wined3d_format format;
5998     POINT dst_point = {0, 0};
5999     BYTE *mem;
6000
6001     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6002             && surface_is_offscreen(surface)
6003             && (surface->flags & SFLAG_INDRAWABLE))
6004     {
6005         surface_load_fb_texture(surface, srgb);
6006
6007         return WINED3D_OK;
6008     }
6009
6010     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6011             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
6012             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6013                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6014                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6015     {
6016         if (srgb)
6017             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INTEXTURE,
6018                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6019         else
6020             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INSRGBTEX,
6021                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6022
6023         return WINED3D_OK;
6024     }
6025
6026     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6027             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6028             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6029                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6030                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6031     {
6032         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6033         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6034         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6035
6036         surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, src_location,
6037                 &rect, surface, dst_location, &rect);
6038
6039         return WINED3D_OK;
6040     }
6041
6042     /* Upload from system memory */
6043
6044     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6045             TRUE /* We will use textures */, &format, &convert);
6046
6047     if (srgb)
6048     {
6049         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6050         {
6051             /* Performance warning... */
6052             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6053             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6054         }
6055     }
6056     else
6057     {
6058         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6059         {
6060             /* Performance warning... */
6061             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6062             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6063         }
6064     }
6065
6066     if (!(surface->flags & SFLAG_INSYSMEM))
6067     {
6068         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6069         /* Lets hope we get it from somewhere... */
6070         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6071     }
6072
6073     /* TODO: Use already acquired context when possible. */
6074     context = context_acquire(device, NULL);
6075
6076     surface_prepare_texture(surface, context, srgb);
6077     surface_bind_and_dirtify(surface, context, srgb);
6078
6079     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6080     {
6081         surface->flags |= SFLAG_GLCKEY;
6082         surface->gl_color_key = surface->src_blt_color_key;
6083     }
6084     else surface->flags &= ~SFLAG_GLCKEY;
6085
6086     width = surface->resource.width;
6087     src_pitch = wined3d_surface_get_pitch(surface);
6088
6089     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6090      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6091      * called. */
6092     if ((convert != WINED3D_CT_NONE || format.convert) && (surface->flags & SFLAG_PBO))
6093     {
6094         TRACE("Removing the pbo attached to surface %p.\n", surface);
6095         surface_remove_pbo(surface, gl_info);
6096     }
6097
6098     if (format.convert)
6099     {
6100         /* This code is entered for texture formats which need a fixup. */
6101         UINT height = surface->resource.height;
6102
6103         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6104         dst_pitch = width * format.conv_byte_count;
6105         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6106
6107         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6108         {
6109             ERR("Out of memory (%u).\n", dst_pitch * height);
6110             context_release(context);
6111             return E_OUTOFMEMORY;
6112         }
6113         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6114         format.byte_count = format.conv_byte_count;
6115         src_pitch = dst_pitch;
6116     }
6117     else if (convert != WINED3D_CT_NONE && surface->resource.allocatedMemory)
6118     {
6119         /* This code is only entered for color keying fixups */
6120         UINT height = surface->resource.height;
6121
6122         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6123         dst_pitch = width * format.conv_byte_count;
6124         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6125
6126         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6127         {
6128             ERR("Out of memory (%u).\n", dst_pitch * height);
6129             context_release(context);
6130             return E_OUTOFMEMORY;
6131         }
6132         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6133                 width, height, dst_pitch, convert, surface);
6134         format.byte_count = format.conv_byte_count;
6135         src_pitch = dst_pitch;
6136     }
6137     else
6138     {
6139         mem = surface->resource.allocatedMemory;
6140     }
6141
6142     data.buffer_object = surface->pbo;
6143     data.addr = mem;
6144     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6145
6146     context_release(context);
6147
6148     /* Don't delete PBO memory. */
6149     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6150         HeapFree(GetProcessHeap(), 0, mem);
6151
6152     return WINED3D_OK;
6153 }
6154
6155 static void surface_multisample_resolve(struct wined3d_surface *surface)
6156 {
6157     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6158
6159     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6160         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6161
6162     surface_blt_fbo(surface->resource.device, WINED3D_TEXF_POINT,
6163             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6164 }
6165
6166 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6167 {
6168     struct wined3d_device *device = surface->resource.device;
6169     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6170     HRESULT hr;
6171
6172     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6173
6174     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6175     {
6176         if (location == SFLAG_INTEXTURE && surface->flags & SFLAG_INDRAWABLE)
6177         {
6178             struct wined3d_context *context = context_acquire(device, NULL);
6179             surface_load_ds_location(surface, context, location);
6180             context_release(context);
6181             return WINED3D_OK;
6182         }
6183         else if (location & surface->flags && surface->draw_binding != SFLAG_INDRAWABLE)
6184         {
6185             /* Already up to date, nothing to do. */
6186             return WINED3D_OK;
6187         }
6188         else
6189         {
6190             FIXME("Unimplemented copy from %s to %s for depth/stencil buffers.\n",
6191                     debug_surflocation(surface->flags & SFLAG_LOCATIONS), debug_surflocation(location));
6192             return WINED3DERR_INVALIDCALL;
6193         }
6194     }
6195
6196     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6197         location = SFLAG_INTEXTURE;
6198
6199     if (surface->flags & location)
6200     {
6201         TRACE("Location already up to date.\n");
6202
6203         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6204                 && surface_need_pbo(surface, gl_info))
6205             surface_load_pbo(surface, gl_info);
6206
6207         return WINED3D_OK;
6208     }
6209
6210     if (WARN_ON(d3d_surface))
6211     {
6212         DWORD required_access = resource_access_from_location(location);
6213         if ((surface->resource.access_flags & required_access) != required_access)
6214             WARN("Operation requires %#x access, but surface only has %#x.\n",
6215                     required_access, surface->resource.access_flags);
6216     }
6217
6218     if (!(surface->flags & SFLAG_LOCATIONS))
6219     {
6220         ERR("Surface %p does not have any up to date location.\n", surface);
6221         surface->flags |= SFLAG_LOST;
6222         return WINED3DERR_DEVICELOST;
6223     }
6224
6225     switch (location)
6226     {
6227         case SFLAG_INSYSMEM:
6228             surface_load_sysmem(surface, gl_info, rect);
6229             break;
6230
6231         case SFLAG_INDRAWABLE:
6232             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6233                 return hr;
6234             break;
6235
6236         case SFLAG_INRB_RESOLVED:
6237             surface_multisample_resolve(surface);
6238             break;
6239
6240         case SFLAG_INTEXTURE:
6241         case SFLAG_INSRGBTEX:
6242             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6243                 return hr;
6244             break;
6245
6246         default:
6247             ERR("Don't know how to handle location %#x.\n", location);
6248             break;
6249     }
6250
6251     if (!rect)
6252     {
6253         surface->flags |= location;
6254
6255         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6256             surface_evict_sysmem(surface);
6257     }
6258
6259     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6260             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6261     {
6262         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6263     }
6264
6265     return WINED3D_OK;
6266 }
6267
6268 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6269 {
6270     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6271
6272     /* Not on a swapchain - must be offscreen */
6273     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6274
6275     /* The front buffer is always onscreen */
6276     if (surface == swapchain->front_buffer) return FALSE;
6277
6278     /* If the swapchain is rendered to an FBO, the backbuffer is
6279      * offscreen, otherwise onscreen */
6280     return swapchain->render_to_fbo;
6281 }
6282
6283 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6284 /* Context activation is done by the caller. */
6285 static void ffp_blit_free(struct wined3d_device *device) { }
6286
6287 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6288 /* Context activation is done by the caller. */
6289 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6290 {
6291     BYTE table[256][4];
6292     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) != 0;
6293     GLenum target;
6294
6295     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
6296         target = surface->container.u.texture->target;
6297     else
6298         target = surface->texture_target;
6299
6300     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6301
6302     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6303     GL_EXTCALL(glColorTableEXT(target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6304 }
6305
6306 /* Context activation is done by the caller. */
6307 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6308 {
6309     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6310     const struct wined3d_gl_info *gl_info = context->gl_info;
6311     GLenum target;
6312
6313     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
6314         target = surface->container.u.texture->target;
6315     else
6316         target = surface->texture_target;
6317
6318     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6319      * else the surface is converted in software at upload time in LoadLocation.
6320      */
6321     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6322             && gl_info->supported[EXT_PALETTED_TEXTURE])
6323         ffp_blit_p8_upload_palette(surface, gl_info);
6324
6325     gl_info->gl_ops.gl.p_glEnable(target);
6326     checkGLcall("glEnable(target)");
6327
6328     return WINED3D_OK;
6329 }
6330
6331 /* Context activation is done by the caller. */
6332 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6333 {
6334     gl_info->gl_ops.gl.p_glDisable(GL_TEXTURE_2D);
6335     checkGLcall("glDisable(GL_TEXTURE_2D)");
6336     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6337     {
6338         gl_info->gl_ops.gl.p_glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6339         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6340     }
6341     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6342     {
6343         gl_info->gl_ops.gl.p_glDisable(GL_TEXTURE_RECTANGLE_ARB);
6344         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6345     }
6346 }
6347
6348 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6349         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6350         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6351 {
6352     enum complex_fixup src_fixup;
6353
6354     switch (blit_op)
6355     {
6356         case WINED3D_BLIT_OP_COLOR_BLIT:
6357             if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
6358                 return FALSE;
6359
6360             src_fixup = get_complex_fixup(src_format->color_fixup);
6361             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6362             {
6363                 TRACE("Checking support for fixup:\n");
6364                 dump_color_fixup_desc(src_format->color_fixup);
6365             }
6366
6367             if (!is_identity_fixup(dst_format->color_fixup))
6368             {
6369                 TRACE("Destination fixups are not supported\n");
6370                 return FALSE;
6371             }
6372
6373             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6374             {
6375                 TRACE("P8 fixup supported\n");
6376                 return TRUE;
6377             }
6378
6379             /* We only support identity conversions. */
6380             if (is_identity_fixup(src_format->color_fixup))
6381             {
6382                 TRACE("[OK]\n");
6383                 return TRUE;
6384             }
6385
6386             TRACE("[FAILED]\n");
6387             return FALSE;
6388
6389         case WINED3D_BLIT_OP_COLOR_FILL:
6390             if (dst_pool == WINED3D_POOL_SYSTEM_MEM)
6391                 return FALSE;
6392
6393             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6394             {
6395                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6396                     return FALSE;
6397             }
6398             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6399             {
6400                 TRACE("Color fill not supported\n");
6401                 return FALSE;
6402             }
6403
6404             /* FIXME: We should reject color fills on formats with fixups,
6405              * but this would break P8 color fills for example. */
6406
6407             return TRUE;
6408
6409         case WINED3D_BLIT_OP_DEPTH_FILL:
6410             return TRUE;
6411
6412         default:
6413             TRACE("Unsupported blit_op=%d\n", blit_op);
6414             return FALSE;
6415     }
6416 }
6417
6418 /* Do not call while under the GL lock. */
6419 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6420         const RECT *dst_rect, const struct wined3d_color *color)
6421 {
6422     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6423     struct wined3d_fb_state fb = {&dst_surface, NULL};
6424
6425     device_clear_render_targets(device, 1, &fb, 1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6426
6427     return WINED3D_OK;
6428 }
6429
6430 /* Do not call while under the GL lock. */
6431 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6432         struct wined3d_surface *surface, const RECT *rect, float depth)
6433 {
6434     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6435     struct wined3d_fb_state fb = {NULL, surface};
6436
6437     device_clear_render_targets(device, 0, &fb, 1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6438
6439     return WINED3D_OK;
6440 }
6441
6442 const struct blit_shader ffp_blit =  {
6443     ffp_blit_alloc,
6444     ffp_blit_free,
6445     ffp_blit_set,
6446     ffp_blit_unset,
6447     ffp_blit_supported,
6448     ffp_blit_color_fill,
6449     ffp_blit_depth_fill,
6450 };
6451
6452 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6453 {
6454     return WINED3D_OK;
6455 }
6456
6457 /* Context activation is done by the caller. */
6458 static void cpu_blit_free(struct wined3d_device *device)
6459 {
6460 }
6461
6462 /* Context activation is done by the caller. */
6463 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6464 {
6465     return WINED3D_OK;
6466 }
6467
6468 /* Context activation is done by the caller. */
6469 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6470 {
6471 }
6472
6473 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6474         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6475         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6476 {
6477     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6478     {
6479         return TRUE;
6480     }
6481
6482     return FALSE;
6483 }
6484
6485 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6486         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6487         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6488 {
6489     UINT row_block_count;
6490     const BYTE *src_row;
6491     BYTE *dst_row;
6492     UINT x, y;
6493
6494     src_row = src_data;
6495     dst_row = dst_data;
6496
6497     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6498
6499     if (!flags)
6500     {
6501         for (y = 0; y < update_h; y += format->block_height)
6502         {
6503             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6504             src_row += src_pitch;
6505             dst_row += dst_pitch;
6506         }
6507
6508         return WINED3D_OK;
6509     }
6510
6511     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6512     {
6513         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6514
6515         switch (format->id)
6516         {
6517             case WINED3DFMT_DXT1:
6518                 for (y = 0; y < update_h; y += format->block_height)
6519                 {
6520                     struct block
6521                     {
6522                         WORD color[2];
6523                         BYTE control_row[4];
6524                     };
6525
6526                     const struct block *s = (const struct block *)src_row;
6527                     struct block *d = (struct block *)dst_row;
6528
6529                     for (x = 0; x < row_block_count; ++x)
6530                     {
6531                         d[x].color[0] = s[x].color[0];
6532                         d[x].color[1] = s[x].color[1];
6533                         d[x].control_row[0] = s[x].control_row[3];
6534                         d[x].control_row[1] = s[x].control_row[2];
6535                         d[x].control_row[2] = s[x].control_row[1];
6536                         d[x].control_row[3] = s[x].control_row[0];
6537                     }
6538                     src_row -= src_pitch;
6539                     dst_row += dst_pitch;
6540                 }
6541                 return WINED3D_OK;
6542
6543             case WINED3DFMT_DXT3:
6544                 for (y = 0; y < update_h; y += format->block_height)
6545                 {
6546                     struct block
6547                     {
6548                         WORD alpha_row[4];
6549                         WORD color[2];
6550                         BYTE control_row[4];
6551                     };
6552
6553                     const struct block *s = (const struct block *)src_row;
6554                     struct block *d = (struct block *)dst_row;
6555
6556                     for (x = 0; x < row_block_count; ++x)
6557                     {
6558                         d[x].alpha_row[0] = s[x].alpha_row[3];
6559                         d[x].alpha_row[1] = s[x].alpha_row[2];
6560                         d[x].alpha_row[2] = s[x].alpha_row[1];
6561                         d[x].alpha_row[3] = s[x].alpha_row[0];
6562                         d[x].color[0] = s[x].color[0];
6563                         d[x].color[1] = s[x].color[1];
6564                         d[x].control_row[0] = s[x].control_row[3];
6565                         d[x].control_row[1] = s[x].control_row[2];
6566                         d[x].control_row[2] = s[x].control_row[1];
6567                         d[x].control_row[3] = s[x].control_row[0];
6568                     }
6569                     src_row -= src_pitch;
6570                     dst_row += dst_pitch;
6571                 }
6572                 return WINED3D_OK;
6573
6574             default:
6575                 FIXME("Compressed flip not implemented for format %s.\n",
6576                         debug_d3dformat(format->id));
6577                 return E_NOTIMPL;
6578         }
6579     }
6580
6581     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6582             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6583
6584     return E_NOTIMPL;
6585 }
6586
6587 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6588         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6589         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
6590 {
6591     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6592     const struct wined3d_format *src_format, *dst_format;
6593     struct wined3d_surface *orig_src = src_surface;
6594     struct wined3d_map_desc dst_map, src_map;
6595     const BYTE *sbase = NULL;
6596     HRESULT hr = WINED3D_OK;
6597     const BYTE *sbuf;
6598     BYTE *dbuf;
6599     int x, y;
6600
6601     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6602             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6603             flags, fx, debug_d3dtexturefiltertype(filter));
6604
6605     if (src_surface == dst_surface)
6606     {
6607         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6608         src_map = dst_map;
6609         src_format = dst_surface->resource.format;
6610         dst_format = src_format;
6611     }
6612     else
6613     {
6614         dst_format = dst_surface->resource.format;
6615         if (src_surface)
6616         {
6617             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6618             {
6619                 src_surface = surface_convert_format(src_surface, dst_format->id);
6620                 if (!src_surface)
6621                 {
6622                     /* The conv function writes a FIXME */
6623                     WARN("Cannot convert source surface format to dest format.\n");
6624                     goto release;
6625                 }
6626             }
6627             wined3d_surface_map(src_surface, &src_map, NULL, WINED3D_MAP_READONLY);
6628             src_format = src_surface->resource.format;
6629         }
6630         else
6631         {
6632             src_format = dst_format;
6633         }
6634
6635         wined3d_surface_map(dst_surface, &dst_map, dst_rect, 0);
6636     }
6637
6638     bpp = dst_surface->resource.format->byte_count;
6639     srcheight = src_rect->bottom - src_rect->top;
6640     srcwidth = src_rect->right - src_rect->left;
6641     dstheight = dst_rect->bottom - dst_rect->top;
6642     dstwidth = dst_rect->right - dst_rect->left;
6643     width = (dst_rect->right - dst_rect->left) * bpp;
6644
6645     if (src_surface)
6646         sbase = (BYTE *)src_map.data
6647                 + ((src_rect->top / src_format->block_height) * src_map.row_pitch)
6648                 + ((src_rect->left / src_format->block_width) * src_format->block_byte_count);
6649     if (src_surface != dst_surface)
6650         dbuf = dst_map.data;
6651     else
6652         dbuf = (BYTE *)dst_map.data
6653                 + ((dst_rect->top / dst_format->block_height) * dst_map.row_pitch)
6654                 + ((dst_rect->left / dst_format->block_width) * dst_format->block_byte_count);
6655
6656     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6657     {
6658         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6659
6660         if (src_surface == dst_surface)
6661         {
6662             FIXME("Only plain blits supported on compressed surfaces.\n");
6663             hr = E_NOTIMPL;
6664             goto release;
6665         }
6666
6667         if (srcheight != dstheight || srcwidth != dstwidth)
6668         {
6669             WARN("Stretching not supported on compressed surfaces.\n");
6670             hr = WINED3DERR_INVALIDCALL;
6671             goto release;
6672         }
6673
6674         if (!surface_check_block_align(src_surface, src_rect))
6675         {
6676             WARN("Source rectangle not block-aligned.\n");
6677             hr = WINED3DERR_INVALIDCALL;
6678             goto release;
6679         }
6680
6681         if (!surface_check_block_align(dst_surface, dst_rect))
6682         {
6683             WARN("Destination rectangle not block-aligned.\n");
6684             hr = WINED3DERR_INVALIDCALL;
6685             goto release;
6686         }
6687
6688         hr = surface_cpu_blt_compressed(sbase, dbuf,
6689                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6690                 src_format, flags, fx);
6691         goto release;
6692     }
6693
6694     /* First, all the 'source-less' blits */
6695     if (flags & WINEDDBLT_COLORFILL)
6696     {
6697         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6698         flags &= ~WINEDDBLT_COLORFILL;
6699     }
6700
6701     if (flags & WINEDDBLT_DEPTHFILL)
6702     {
6703         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6704     }
6705     if (flags & WINEDDBLT_ROP)
6706     {
6707         /* Catch some degenerate cases here. */
6708         switch (fx->dwROP)
6709         {
6710             case BLACKNESS:
6711                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6712                 break;
6713             case 0xaa0029: /* No-op */
6714                 break;
6715             case WHITENESS:
6716                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6717                 break;
6718             case SRCCOPY: /* Well, we do that below? */
6719                 break;
6720             default:
6721                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6722                 goto error;
6723         }
6724         flags &= ~WINEDDBLT_ROP;
6725     }
6726     if (flags & WINEDDBLT_DDROPS)
6727     {
6728         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6729     }
6730     /* Now the 'with source' blits. */
6731     if (src_surface)
6732     {
6733         int sx, xinc, sy, yinc;
6734
6735         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6736             goto release;
6737
6738         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT
6739                 && (srcwidth != dstwidth || srcheight != dstheight))
6740         {
6741             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6742             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6743         }
6744
6745         xinc = (srcwidth << 16) / dstwidth;
6746         yinc = (srcheight << 16) / dstheight;
6747
6748         if (!flags)
6749         {
6750             /* No effects, we can cheat here. */
6751             if (dstwidth == srcwidth)
6752             {
6753                 if (dstheight == srcheight)
6754                 {
6755                     /* No stretching in either direction. This needs to be as
6756                      * fast as possible. */
6757                     sbuf = sbase;
6758
6759                     /* Check for overlapping surfaces. */
6760                     if (src_surface != dst_surface || dst_rect->top < src_rect->top
6761                             || dst_rect->right <= src_rect->left || src_rect->right <= dst_rect->left)
6762                     {
6763                         /* No overlap, or dst above src, so copy from top downwards. */
6764                         for (y = 0; y < dstheight; ++y)
6765                         {
6766                             memcpy(dbuf, sbuf, width);
6767                             sbuf += src_map.row_pitch;
6768                             dbuf += dst_map.row_pitch;
6769                         }
6770                     }
6771                     else if (dst_rect->top > src_rect->top)
6772                     {
6773                         /* Copy from bottom upwards. */
6774                         sbuf += src_map.row_pitch * dstheight;
6775                         dbuf += dst_map.row_pitch * dstheight;
6776                         for (y = 0; y < dstheight; ++y)
6777                         {
6778                             sbuf -= src_map.row_pitch;
6779                             dbuf -= dst_map.row_pitch;
6780                             memcpy(dbuf, sbuf, width);
6781                         }
6782                     }
6783                     else
6784                     {
6785                         /* Src and dst overlapping on the same line, use memmove. */
6786                         for (y = 0; y < dstheight; ++y)
6787                         {
6788                             memmove(dbuf, sbuf, width);
6789                             sbuf += src_map.row_pitch;
6790                             dbuf += dst_map.row_pitch;
6791                         }
6792                     }
6793                 }
6794                 else
6795                 {
6796                     /* Stretching in y direction only. */
6797                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6798                     {
6799                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6800                         memcpy(dbuf, sbuf, width);
6801                         dbuf += dst_map.row_pitch;
6802                     }
6803                 }
6804             }
6805             else
6806             {
6807                 /* Stretching in X direction. */
6808                 int last_sy = -1;
6809                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6810                 {
6811                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6812
6813                     if ((sy >> 16) == (last_sy >> 16))
6814                     {
6815                         /* This source row is the same as last source row -
6816                          * Copy the already stretched row. */
6817                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6818                     }
6819                     else
6820                     {
6821 #define STRETCH_ROW(type) \
6822 do { \
6823     const type *s = (const type *)sbuf; \
6824     type *d = (type *)dbuf; \
6825     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6826         d[x] = s[sx >> 16]; \
6827 } while(0)
6828
6829                         switch(bpp)
6830                         {
6831                             case 1:
6832                                 STRETCH_ROW(BYTE);
6833                                 break;
6834                             case 2:
6835                                 STRETCH_ROW(WORD);
6836                                 break;
6837                             case 4:
6838                                 STRETCH_ROW(DWORD);
6839                                 break;
6840                             case 3:
6841                             {
6842                                 const BYTE *s;
6843                                 BYTE *d = dbuf;
6844                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6845                                 {
6846                                     DWORD pixel;
6847
6848                                     s = sbuf + 3 * (sx >> 16);
6849                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6850                                     d[0] = (pixel      ) & 0xff;
6851                                     d[1] = (pixel >>  8) & 0xff;
6852                                     d[2] = (pixel >> 16) & 0xff;
6853                                     d += 3;
6854                                 }
6855                                 break;
6856                             }
6857                             default:
6858                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6859                                 hr = WINED3DERR_NOTAVAILABLE;
6860                                 goto error;
6861                         }
6862 #undef STRETCH_ROW
6863                     }
6864                     dbuf += dst_map.row_pitch;
6865                     last_sy = sy;
6866                 }
6867             }
6868         }
6869         else
6870         {
6871             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6872             DWORD keylow = 0xffffffff, keyhigh = 0, keymask = 0xffffffff;
6873             DWORD destkeylow = 0x0, destkeyhigh = 0xffffffff, destkeymask = 0xffffffff;
6874             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6875             {
6876                 /* The color keying flags are checked for correctness in ddraw */
6877                 if (flags & WINEDDBLT_KEYSRC)
6878                 {
6879                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6880                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6881                 }
6882                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6883                 {
6884                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6885                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6886                 }
6887
6888                 if (flags & WINEDDBLT_KEYDEST)
6889                 {
6890                     /* Destination color keys are taken from the source surface! */
6891                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6892                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6893                 }
6894                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6895                 {
6896                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6897                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6898                 }
6899
6900                 if (bpp == 1)
6901                 {
6902                     keymask = 0xff;
6903                 }
6904                 else
6905                 {
6906                     DWORD masks[3];
6907                     get_color_masks(src_format, masks);
6908                     keymask = masks[0]
6909                             | masks[1]
6910                             | masks[2];
6911                 }
6912                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6913             }
6914
6915             if (flags & WINEDDBLT_DDFX)
6916             {
6917                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6918                 LONG tmpxy;
6919                 dTopLeft     = dbuf;
6920                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6921                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6922                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6923
6924                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6925                 {
6926                     /* I don't think we need to do anything about this flag */
6927                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6928                 }
6929                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6930                 {
6931                     tmp          = dTopRight;
6932                     dTopRight    = dTopLeft;
6933                     dTopLeft     = tmp;
6934                     tmp          = dBottomRight;
6935                     dBottomRight = dBottomLeft;
6936                     dBottomLeft  = tmp;
6937                     dstxinc = dstxinc * -1;
6938                 }
6939                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6940                 {
6941                     tmp          = dTopLeft;
6942                     dTopLeft     = dBottomLeft;
6943                     dBottomLeft  = tmp;
6944                     tmp          = dTopRight;
6945                     dTopRight    = dBottomRight;
6946                     dBottomRight = tmp;
6947                     dstyinc = dstyinc * -1;
6948                 }
6949                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6950                 {
6951                     /* I don't think we need to do anything about this flag */
6952                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6953                 }
6954                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6955                 {
6956                     tmp          = dBottomRight;
6957                     dBottomRight = dTopLeft;
6958                     dTopLeft     = tmp;
6959                     tmp          = dBottomLeft;
6960                     dBottomLeft  = dTopRight;
6961                     dTopRight    = tmp;
6962                     dstxinc = dstxinc * -1;
6963                     dstyinc = dstyinc * -1;
6964                 }
6965                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6966                 {
6967                     tmp          = dTopLeft;
6968                     dTopLeft     = dBottomLeft;
6969                     dBottomLeft  = dBottomRight;
6970                     dBottomRight = dTopRight;
6971                     dTopRight    = tmp;
6972                     tmpxy   = dstxinc;
6973                     dstxinc = dstyinc;
6974                     dstyinc = tmpxy;
6975                     dstxinc = dstxinc * -1;
6976                 }
6977                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6978                 {
6979                     tmp          = dTopLeft;
6980                     dTopLeft     = dTopRight;
6981                     dTopRight    = dBottomRight;
6982                     dBottomRight = dBottomLeft;
6983                     dBottomLeft  = tmp;
6984                     tmpxy   = dstxinc;
6985                     dstxinc = dstyinc;
6986                     dstyinc = tmpxy;
6987                     dstyinc = dstyinc * -1;
6988                 }
6989                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6990                 {
6991                     /* I don't think we need to do anything about this flag */
6992                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6993                 }
6994                 dbuf = dTopLeft;
6995                 flags &= ~(WINEDDBLT_DDFX);
6996             }
6997
6998 #define COPY_COLORKEY_FX(type) \
6999 do { \
7000     const type *s; \
7001     type *d = (type *)dbuf, *dx, tmp; \
7002     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7003     { \
7004         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
7005         dx = d; \
7006         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7007         { \
7008             tmp = s[sx >> 16]; \
7009             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7010                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7011             { \
7012                 dx[0] = tmp; \
7013             } \
7014             dx = (type *)(((BYTE *)dx) + dstxinc); \
7015         } \
7016         d = (type *)(((BYTE *)d) + dstyinc); \
7017     } \
7018 } while(0)
7019
7020             switch (bpp)
7021             {
7022                 case 1:
7023                     COPY_COLORKEY_FX(BYTE);
7024                     break;
7025                 case 2:
7026                     COPY_COLORKEY_FX(WORD);
7027                     break;
7028                 case 4:
7029                     COPY_COLORKEY_FX(DWORD);
7030                     break;
7031                 case 3:
7032                 {
7033                     const BYTE *s;
7034                     BYTE *d = dbuf, *dx;
7035                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7036                     {
7037                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7038                         dx = d;
7039                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7040                         {
7041                             DWORD pixel, dpixel = 0;
7042                             s = sbuf + 3 * (sx>>16);
7043                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7044                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7045                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7046                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7047                             {
7048                                 dx[0] = (pixel      ) & 0xff;
7049                                 dx[1] = (pixel >>  8) & 0xff;
7050                                 dx[2] = (pixel >> 16) & 0xff;
7051                             }
7052                             dx += dstxinc;
7053                         }
7054                         d += dstyinc;
7055                     }
7056                     break;
7057                 }
7058                 default:
7059                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7060                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7061                     hr = WINED3DERR_NOTAVAILABLE;
7062                     goto error;
7063 #undef COPY_COLORKEY_FX
7064             }
7065         }
7066     }
7067
7068 error:
7069     if (flags && FIXME_ON(d3d_surface))
7070     {
7071         FIXME("\tUnsupported flags: %#x.\n", flags);
7072     }
7073
7074 release:
7075     wined3d_surface_unmap(dst_surface);
7076     if (src_surface && src_surface != dst_surface)
7077         wined3d_surface_unmap(src_surface);
7078     /* Release the converted surface, if any. */
7079     if (src_surface && src_surface != orig_src)
7080         wined3d_surface_decref(src_surface);
7081
7082     return hr;
7083 }
7084
7085 /* Do not call while under the GL lock. */
7086 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7087         const RECT *dst_rect, const struct wined3d_color *color)
7088 {
7089     static const RECT src_rect;
7090     WINEDDBLTFX BltFx;
7091
7092     memset(&BltFx, 0, sizeof(BltFx));
7093     BltFx.dwSize = sizeof(BltFx);
7094     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7095     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7096             WINEDDBLT_COLORFILL, &BltFx, WINED3D_TEXF_POINT);
7097 }
7098
7099 /* Do not call while under the GL lock. */
7100 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7101         struct wined3d_surface *surface, const RECT *rect, float depth)
7102 {
7103     FIXME("Depth filling not implemented by cpu_blit.\n");
7104     return WINED3DERR_INVALIDCALL;
7105 }
7106
7107 const struct blit_shader cpu_blit =  {
7108     cpu_blit_alloc,
7109     cpu_blit_free,
7110     cpu_blit_set,
7111     cpu_blit_unset,
7112     cpu_blit_supported,
7113     cpu_blit_color_fill,
7114     cpu_blit_depth_fill,
7115 };
7116
7117 static HRESULT surface_init(struct wined3d_surface *surface, UINT alignment, UINT width, UINT height,
7118         enum wined3d_multisample_type multisample_type, UINT multisample_quality,
7119         struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7120         enum wined3d_pool pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7121 {
7122     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7123     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7124     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7125     unsigned int resource_size;
7126     HRESULT hr;
7127
7128     if (multisample_quality > 0)
7129     {
7130         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7131         multisample_quality = 0;
7132     }
7133
7134     /* Quick lockable sanity check.
7135      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7136      * this function is too deep to need to care about things like this.
7137      * Levels need to be checked too, since they all affect what can be done. */
7138     switch (pool)
7139     {
7140         case WINED3D_POOL_SCRATCH:
7141             if (!lockable)
7142             {
7143                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7144                         "which are mutually exclusive, setting lockable to TRUE.\n");
7145                 lockable = TRUE;
7146             }
7147             break;
7148
7149         case WINED3D_POOL_SYSTEM_MEM:
7150             if (!lockable)
7151                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7152             break;
7153
7154         case WINED3D_POOL_MANAGED:
7155             if (usage & WINED3DUSAGE_DYNAMIC)
7156                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7157             break;
7158
7159         case WINED3D_POOL_DEFAULT:
7160             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7161                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7162             break;
7163
7164         default:
7165             FIXME("Unknown pool %#x.\n", pool);
7166             break;
7167     };
7168
7169     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3D_POOL_DEFAULT)
7170         FIXME("Trying to create a render target that isn't in the default pool.\n");
7171
7172     /* FIXME: Check that the format is supported by the device. */
7173
7174     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7175     if (!resource_size)
7176         return WINED3DERR_INVALIDCALL;
7177
7178     if (device->wined3d->flags & WINED3D_NO3D)
7179         surface->surface_ops = &gdi_surface_ops;
7180     else
7181         surface->surface_ops = &surface_ops;
7182
7183     hr = resource_init(&surface->resource, device, WINED3D_RTYPE_SURFACE, format,
7184             multisample_type, multisample_quality, usage, pool, width, height, 1,
7185             resource_size, parent, parent_ops, &surface_resource_ops);
7186     if (FAILED(hr))
7187     {
7188         WARN("Failed to initialize resource, returning %#x.\n", hr);
7189         return hr;
7190     }
7191
7192     /* "Standalone" surface. */
7193     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7194
7195     list_init(&surface->overlays);
7196
7197     /* Flags */
7198     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7199     if (flags & WINED3D_SURFACE_DISCARD)
7200         surface->flags |= SFLAG_DISCARD;
7201     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7202         surface->flags |= SFLAG_PIN_SYSMEM;
7203     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7204         surface->flags |= SFLAG_LOCKABLE;
7205     /* I'm not sure if this qualifies as a hack or as an optimization. It
7206      * seems reasonable to assume that lockable render targets will get
7207      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7208      * creation. However, the other reason we want to do this is that several
7209      * ddraw applications access surface memory while the surface isn't
7210      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7211      * future locks prevents these from crashing. */
7212     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7213         surface->flags |= SFLAG_DYNLOCK;
7214
7215     /* Mark the texture as dirty so that it gets loaded first time around. */
7216     surface_add_dirty_rect(surface, NULL);
7217     list_init(&surface->renderbuffers);
7218
7219     TRACE("surface %p, memory %p, size %u\n",
7220             surface, surface->resource.allocatedMemory, surface->resource.size);
7221
7222     /* Call the private setup routine */
7223     hr = surface->surface_ops->surface_private_setup(surface);
7224     if (FAILED(hr))
7225     {
7226         ERR("Private setup failed, returning %#x\n", hr);
7227         surface_cleanup(surface);
7228         return hr;
7229     }
7230
7231     /* Similar to lockable rendertargets above, creating the DIB section
7232      * during surface initialization prevents the sysmem pointer from changing
7233      * after a wined3d_surface_getdc() call. */
7234     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7235             && SUCCEEDED(surface_create_dib_section(surface)))
7236     {
7237         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7238         surface->resource.heapMemory = NULL;
7239         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7240     }
7241
7242     return hr;
7243 }
7244
7245 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7246         enum wined3d_format_id format_id, DWORD usage, enum wined3d_pool pool,
7247         enum wined3d_multisample_type multisample_type, DWORD multisample_quality, DWORD flags,
7248         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7249 {
7250     struct wined3d_surface *object;
7251     HRESULT hr;
7252
7253     TRACE("device %p, width %u, height %u, format %s\n",
7254             device, width, height, debug_d3dformat(format_id));
7255     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7256             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7257     TRACE("flags %#x, parent %p, parent_ops %p.\n", flags, parent, parent_ops);
7258
7259     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7260     if (!object)
7261         return WINED3DERR_OUTOFVIDEOMEMORY;
7262
7263     if (FAILED(hr = surface_init(object, device->surface_alignment, width, height, multisample_type,
7264             multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops)))
7265     {
7266         WARN("Failed to initialize surface, returning %#x.\n", hr);
7267         HeapFree(GetProcessHeap(), 0, object);
7268         return hr;
7269     }
7270
7271     TRACE("Created surface %p.\n", object);
7272     *surface = object;
7273
7274     return hr;
7275 }