wined3d: Make shader buffer dynamic.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         enum wined3d_texture_filter_type filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         if (surface->texture_name)
61         {
62             TRACE("Deleting texture %u.\n", surface->texture_name);
63             gl_info->gl_ops.gl.p_glDeleteTextures(1, &surface->texture_name);
64         }
65
66         if (surface->flags & SFLAG_PBO)
67         {
68             TRACE("Deleting PBO %u.\n", surface->pbo);
69             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
70         }
71
72         if (surface->rb_multisample)
73         {
74             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
75             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
76         }
77
78         if (surface->rb_resolved)
79         {
80             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
81             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
82         }
83
84         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
85         {
86             TRACE("Deleting renderbuffer %u.\n", entry->id);
87             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
88             HeapFree(GetProcessHeap(), 0, entry);
89         }
90
91         context_release(context);
92     }
93
94     if (surface->flags & SFLAG_DIBSECTION)
95     {
96         DeleteDC(surface->hDC);
97         DeleteObject(surface->dib.DIBsection);
98         surface->dib.bitmap_data = NULL;
99         surface->resource.allocatedMemory = NULL;
100     }
101
102     if (surface->flags & SFLAG_USERPTR)
103         wined3d_surface_set_mem(surface, NULL);
104     if (surface->overlay_dest)
105         list_remove(&surface->overlay_entry);
106
107     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
108     {
109         list_remove(&overlay->overlay_entry);
110         overlay->overlay_dest = NULL;
111     }
112
113     resource_cleanup(&surface->resource);
114 }
115
116 void surface_update_draw_binding(struct wined3d_surface *surface)
117 {
118     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
119         surface->draw_binding = SFLAG_INDRAWABLE;
120     else if (surface->resource.multisample_type)
121         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
122     else
123         surface->draw_binding = SFLAG_INTEXTURE;
124 }
125
126 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
127 {
128     TRACE("surface %p, container %p.\n", surface, container);
129
130     if (!container && type != WINED3D_CONTAINER_NONE)
131         ERR("Setting NULL container of type %#x.\n", type);
132
133     if (type == WINED3D_CONTAINER_SWAPCHAIN)
134     {
135         surface->get_drawable_size = get_drawable_size_swapchain;
136     }
137     else
138     {
139         switch (wined3d_settings.offscreen_rendering_mode)
140         {
141             case ORM_FBO:
142                 surface->get_drawable_size = get_drawable_size_fbo;
143                 break;
144
145             case ORM_BACKBUFFER:
146                 surface->get_drawable_size = get_drawable_size_backbuffer;
147                 break;
148
149             default:
150                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
151                 return;
152         }
153     }
154
155     surface->container.type = type;
156     surface->container.u.base = container;
157     surface_update_draw_binding(surface);
158 }
159
160 struct blt_info
161 {
162     GLenum binding;
163     GLenum bind_target;
164     enum tex_types tex_type;
165     GLfloat coords[4][3];
166 };
167
168 struct float_rect
169 {
170     float l;
171     float t;
172     float r;
173     float b;
174 };
175
176 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
177 {
178     f->l = ((r->left * 2.0f) / w) - 1.0f;
179     f->t = ((r->top * 2.0f) / h) - 1.0f;
180     f->r = ((r->right * 2.0f) / w) - 1.0f;
181     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
182 }
183
184 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
185 {
186     GLfloat (*coords)[3] = info->coords;
187     struct float_rect f;
188
189     switch (target)
190     {
191         default:
192             FIXME("Unsupported texture target %#x\n", target);
193             /* Fall back to GL_TEXTURE_2D */
194         case GL_TEXTURE_2D:
195             info->binding = GL_TEXTURE_BINDING_2D;
196             info->bind_target = GL_TEXTURE_2D;
197             info->tex_type = tex_2d;
198             coords[0][0] = (float)rect->left / w;
199             coords[0][1] = (float)rect->top / h;
200             coords[0][2] = 0.0f;
201
202             coords[1][0] = (float)rect->right / w;
203             coords[1][1] = (float)rect->top / h;
204             coords[1][2] = 0.0f;
205
206             coords[2][0] = (float)rect->left / w;
207             coords[2][1] = (float)rect->bottom / h;
208             coords[2][2] = 0.0f;
209
210             coords[3][0] = (float)rect->right / w;
211             coords[3][1] = (float)rect->bottom / h;
212             coords[3][2] = 0.0f;
213             break;
214
215         case GL_TEXTURE_RECTANGLE_ARB:
216             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
217             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
218             info->tex_type = tex_rect;
219             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
220             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
221             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
222             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
223             break;
224
225         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
226             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
227             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
228             info->tex_type = tex_cube;
229             cube_coords_float(rect, w, h, &f);
230
231             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
232             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
233             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
234             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
235             break;
236
237         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
238             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
239             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
240             info->tex_type = tex_cube;
241             cube_coords_float(rect, w, h, &f);
242
243             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
244             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
245             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
246             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
247             break;
248
249         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
250             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
251             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
252             info->tex_type = tex_cube;
253             cube_coords_float(rect, w, h, &f);
254
255             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
256             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
257             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
258             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
259             break;
260
261         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
262             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
263             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
264             info->tex_type = tex_cube;
265             cube_coords_float(rect, w, h, &f);
266
267             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
268             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
269             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
270             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
271             break;
272
273         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
274             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
275             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
276             info->tex_type = tex_cube;
277             cube_coords_float(rect, w, h, &f);
278
279             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
280             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
281             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
282             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
283             break;
284
285         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
286             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
287             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
288             info->tex_type = tex_cube;
289             cube_coords_float(rect, w, h, &f);
290
291             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
292             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
293             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
294             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
295             break;
296     }
297 }
298
299 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
300 {
301     if (rect_in)
302         *rect_out = *rect_in;
303     else
304     {
305         rect_out->left = 0;
306         rect_out->top = 0;
307         rect_out->right = surface->resource.width;
308         rect_out->bottom = surface->resource.height;
309     }
310 }
311
312 /* Context activation is done by the caller. */
313 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
314         const RECT *src_rect, const RECT *dst_rect, enum wined3d_texture_filter_type filter)
315 {
316     const struct wined3d_gl_info *gl_info = context->gl_info;
317     struct blt_info info;
318
319     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
320
321     gl_info->gl_ops.gl.p_glEnable(info.bind_target);
322     checkGLcall("glEnable(bind_target)");
323
324     context_bind_texture(context, info.bind_target, src_surface->texture_name);
325
326     /* Filtering for StretchRect */
327     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
328             wined3d_gl_mag_filter(magLookup, filter));
329     checkGLcall("glTexParameteri");
330     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
331             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
332     checkGLcall("glTexParameteri");
333     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
334     gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
335     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
336         gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
337     gl_info->gl_ops.gl.p_glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
338     checkGLcall("glTexEnvi");
339
340     /* Draw a quad */
341     gl_info->gl_ops.gl.p_glBegin(GL_TRIANGLE_STRIP);
342     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[0]);
343     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->left, dst_rect->top);
344
345     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[1]);
346     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->right, dst_rect->top);
347
348     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[2]);
349     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->left, dst_rect->bottom);
350
351     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[3]);
352     gl_info->gl_ops.gl.p_glVertex2i(dst_rect->right, dst_rect->bottom);
353     gl_info->gl_ops.gl.p_glEnd();
354
355     /* Unbind the texture */
356     context_bind_texture(context, info.bind_target, 0);
357
358     /* We changed the filtering settings on the texture. Inform the
359      * container about this to get the filters reset properly next draw. */
360     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
361     {
362         struct wined3d_texture *texture = src_surface->container.u.texture;
363         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3D_TEXF_POINT;
364         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3D_TEXF_POINT;
365         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3D_TEXF_NONE;
366         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
367     }
368 }
369
370 /* Works correctly only for <= 4 bpp formats. */
371 static void get_color_masks(const struct wined3d_format *format, DWORD *masks)
372 {
373     masks[0] = ((1 << format->red_size) - 1) << format->red_offset;
374     masks[1] = ((1 << format->green_size) - 1) << format->green_offset;
375     masks[2] = ((1 << format->blue_size) - 1) << format->blue_offset;
376 }
377
378 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
379 {
380     const struct wined3d_format *format = surface->resource.format;
381     SYSTEM_INFO sysInfo;
382     BITMAPINFO *b_info;
383     int extraline = 0;
384     DWORD *masks;
385
386     TRACE("surface %p.\n", surface);
387
388     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
389     {
390         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
391         return WINED3DERR_INVALIDCALL;
392     }
393
394     switch (format->byte_count)
395     {
396         case 2:
397         case 4:
398             /* Allocate extra space to store the RGB bit masks. */
399             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
400             break;
401
402         case 3:
403             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
404             break;
405
406         default:
407             /* Allocate extra space for a palette. */
408             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
409                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
410             break;
411     }
412
413     if (!b_info)
414         return E_OUTOFMEMORY;
415
416     /* Some applications access the surface in via DWORDs, and do not take
417      * the necessary care at the end of the surface. So we need at least
418      * 4 extra bytes at the end of the surface. Check against the page size,
419      * if the last page used for the surface has at least 4 spare bytes we're
420      * safe, otherwise add an extra line to the DIB section. */
421     GetSystemInfo(&sysInfo);
422     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
423     {
424         extraline = 1;
425         TRACE("Adding an extra line to the DIB section.\n");
426     }
427
428     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
429     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
430     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
431     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
432     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
433             * wined3d_surface_get_pitch(surface);
434     b_info->bmiHeader.biPlanes = 1;
435     b_info->bmiHeader.biBitCount = format->byte_count * 8;
436
437     b_info->bmiHeader.biXPelsPerMeter = 0;
438     b_info->bmiHeader.biYPelsPerMeter = 0;
439     b_info->bmiHeader.biClrUsed = 0;
440     b_info->bmiHeader.biClrImportant = 0;
441
442     /* Get the bit masks */
443     masks = (DWORD *)b_info->bmiColors;
444     switch (surface->resource.format->id)
445     {
446         case WINED3DFMT_B8G8R8_UNORM:
447             b_info->bmiHeader.biCompression = BI_RGB;
448             break;
449
450         case WINED3DFMT_B5G5R5X1_UNORM:
451         case WINED3DFMT_B5G5R5A1_UNORM:
452         case WINED3DFMT_B4G4R4A4_UNORM:
453         case WINED3DFMT_B4G4R4X4_UNORM:
454         case WINED3DFMT_B2G3R3_UNORM:
455         case WINED3DFMT_B2G3R3A8_UNORM:
456         case WINED3DFMT_R10G10B10A2_UNORM:
457         case WINED3DFMT_R8G8B8A8_UNORM:
458         case WINED3DFMT_R8G8B8X8_UNORM:
459         case WINED3DFMT_B10G10R10A2_UNORM:
460         case WINED3DFMT_B5G6R5_UNORM:
461         case WINED3DFMT_R16G16B16A16_UNORM:
462             b_info->bmiHeader.biCompression = BI_BITFIELDS;
463             get_color_masks(format, masks);
464             break;
465
466         default:
467             /* Don't know palette */
468             b_info->bmiHeader.biCompression = BI_RGB;
469             break;
470     }
471
472     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
473             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
474             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
475     surface->dib.DIBsection = CreateDIBSection(0, b_info, DIB_RGB_COLORS, &surface->dib.bitmap_data, 0, 0);
476
477     if (!surface->dib.DIBsection)
478     {
479         ERR("Failed to create DIB section.\n");
480         HeapFree(GetProcessHeap(), 0, b_info);
481         return HRESULT_FROM_WIN32(GetLastError());
482     }
483
484     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
485     /* Copy the existing surface to the dib section. */
486     if (surface->resource.allocatedMemory)
487     {
488         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
489                 surface->resource.height * wined3d_surface_get_pitch(surface));
490     }
491     else
492     {
493         /* This is to make maps read the GL texture although memory is allocated. */
494         surface->flags &= ~SFLAG_INSYSMEM;
495     }
496     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
497
498     HeapFree(GetProcessHeap(), 0, b_info);
499
500     /* Now allocate a DC. */
501     surface->hDC = CreateCompatibleDC(0);
502     SelectObject(surface->hDC, surface->dib.DIBsection);
503     TRACE("Using wined3d palette %p.\n", surface->palette);
504     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
505
506     surface->flags |= SFLAG_DIBSECTION;
507
508     return WINED3D_OK;
509 }
510
511 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
512 {
513     if (surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
514         return FALSE;
515     if (!(surface->flags & SFLAG_DYNLOCK))
516         return FALSE;
517     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
518         return FALSE;
519     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
520         return FALSE;
521
522     return TRUE;
523 }
524
525 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
526 {
527     struct wined3d_context *context;
528     GLenum error;
529
530     context = context_acquire(surface->resource.device, NULL);
531
532     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
533     error = gl_info->gl_ops.gl.p_glGetError();
534     if (!surface->pbo || error != GL_NO_ERROR)
535         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
536
537     TRACE("Binding PBO %u.\n", surface->pbo);
538
539     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
540     checkGLcall("glBindBufferARB");
541
542     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
543             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
544     checkGLcall("glBufferDataARB");
545
546     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
547     checkGLcall("glBindBufferARB");
548
549     /* We don't need the system memory anymore and we can't even use it for PBOs. */
550     if (!(surface->flags & SFLAG_CLIENT))
551     {
552         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
553         surface->resource.heapMemory = NULL;
554     }
555     surface->resource.allocatedMemory = NULL;
556     surface->flags |= SFLAG_PBO;
557     context_release(context);
558 }
559
560 static void surface_prepare_system_memory(struct wined3d_surface *surface)
561 {
562     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
563
564     TRACE("surface %p.\n", surface);
565
566     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
567         surface_load_pbo(surface, gl_info);
568     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
569     {
570         /* Whatever surface we have, make sure that there is memory allocated
571          * for the downloaded copy, or a PBO to map. */
572         if (!surface->resource.heapMemory)
573             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
574
575         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
576                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
577
578         if (surface->flags & SFLAG_INSYSMEM)
579             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
580     }
581 }
582
583 static void surface_evict_sysmem(struct wined3d_surface *surface)
584 {
585     if (surface->resource.map_count || (surface->flags & SFLAG_DONOTFREE))
586         return;
587
588     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
589     surface->resource.allocatedMemory = NULL;
590     surface->resource.heapMemory = NULL;
591     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
592 }
593
594 /* Context activation is done by the caller. */
595 static void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
596 {
597     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
598
599     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
600     {
601         struct wined3d_texture *texture = surface->container.u.texture;
602
603         TRACE("Passing to container (%p).\n", texture);
604         texture->texture_ops->texture_bind(texture, context, srgb);
605     }
606     else
607     {
608         const struct wined3d_gl_info *gl_info = context->gl_info;
609
610         if (surface->texture_level)
611         {
612             ERR("Standalone surface %p is non-zero texture level %u.\n",
613                     surface, surface->texture_level);
614         }
615
616         if (srgb)
617             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
618
619         if (!surface->texture_name)
620         {
621             gl_info->gl_ops.gl.p_glGenTextures(1, &surface->texture_name);
622             checkGLcall("glGenTextures");
623
624             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
625
626             context_bind_texture(context, surface->texture_target, surface->texture_name);
627             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
628             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
629             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
630             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
631             gl_info->gl_ops.gl.p_glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
632             checkGLcall("glTexParameteri");
633         }
634         else
635         {
636             context_bind_texture(context, surface->texture_target, surface->texture_name);
637         }
638     }
639 }
640
641 /* Context activation is done by the caller. */
642 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
643         struct wined3d_context *context, BOOL srgb)
644 {
645     struct wined3d_device *device = surface->resource.device;
646     DWORD active_sampler;
647
648     /* We don't need a specific texture unit, but after binding the texture
649      * the current unit is dirty. Read the unit back instead of switching to
650      * 0, this avoids messing around with the state manager's GL states. The
651      * current texture unit should always be a valid one.
652      *
653      * To be more specific, this is tricky because we can implicitly be
654      * called from sampler() in state.c. This means we can't touch anything
655      * other than whatever happens to be the currently active texture, or we
656      * would risk marking already applied sampler states dirty again. */
657     active_sampler = device->rev_tex_unit_map[context->active_texture];
658
659     if (active_sampler != WINED3D_UNMAPPED_STAGE)
660         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
661     surface_bind(surface, context, srgb);
662 }
663
664 static void surface_force_reload(struct wined3d_surface *surface)
665 {
666     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
667 }
668
669 static void surface_release_client_storage(struct wined3d_surface *surface)
670 {
671     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
672     const struct wined3d_gl_info *gl_info = context->gl_info;
673
674     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
675     if (surface->texture_name)
676     {
677         surface_bind_and_dirtify(surface, context, FALSE);
678         gl_info->gl_ops.gl.p_glTexImage2D(surface->texture_target, surface->texture_level,
679                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
680     }
681     if (surface->texture_name_srgb)
682     {
683         surface_bind_and_dirtify(surface, context, TRUE);
684         gl_info->gl_ops.gl.p_glTexImage2D(surface->texture_target, surface->texture_level,
685                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
686     }
687     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
688
689     context_release(context);
690
691     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
692     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
693     surface_force_reload(surface);
694 }
695
696 static HRESULT surface_private_setup(struct wined3d_surface *surface)
697 {
698     /* TODO: Check against the maximum texture sizes supported by the video card. */
699     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
700     unsigned int pow2Width, pow2Height;
701
702     TRACE("surface %p.\n", surface);
703
704     surface->texture_name = 0;
705     surface->texture_target = GL_TEXTURE_2D;
706
707     /* Non-power2 support */
708     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
709     {
710         pow2Width = surface->resource.width;
711         pow2Height = surface->resource.height;
712     }
713     else
714     {
715         /* Find the nearest pow2 match */
716         pow2Width = pow2Height = 1;
717         while (pow2Width < surface->resource.width)
718             pow2Width <<= 1;
719         while (pow2Height < surface->resource.height)
720             pow2Height <<= 1;
721     }
722     surface->pow2Width = pow2Width;
723     surface->pow2Height = pow2Height;
724
725     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
726     {
727         /* TODO: Add support for non power two compressed textures. */
728         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
729         {
730             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
731                   surface, surface->resource.width, surface->resource.height);
732             return WINED3DERR_NOTAVAILABLE;
733         }
734     }
735
736     if (pow2Width != surface->resource.width
737             || pow2Height != surface->resource.height)
738     {
739         surface->flags |= SFLAG_NONPOW2;
740     }
741
742     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
743             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
744     {
745         /* One of three options:
746          * 1: Do the same as we do with NPOT and scale the texture, (any
747          *    texture ops would require the texture to be scaled which is
748          *    potentially slow)
749          * 2: Set the texture to the maximum size (bad idea).
750          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
751          * 4: Create the surface, but allow it to be used only for DirectDraw
752          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
753          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
754          *    the render target. */
755         if (surface->resource.pool == WINED3D_POOL_DEFAULT || surface->resource.pool == WINED3D_POOL_MANAGED)
756         {
757             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
758             return WINED3DERR_NOTAVAILABLE;
759         }
760
761         /* We should never use this surface in combination with OpenGL! */
762         TRACE("Creating an oversized surface: %ux%u.\n",
763                 surface->pow2Width, surface->pow2Height);
764     }
765     else
766     {
767         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
768          * and EXT_PALETTED_TEXTURE is used in combination with texture
769          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
770          * EXT_PALETTED_TEXTURE doesn't work in combination with
771          * ARB_TEXTURE_RECTANGLE. */
772         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
773                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
774                 && gl_info->supported[EXT_PALETTED_TEXTURE]
775                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
776         {
777             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
778             surface->pow2Width = surface->resource.width;
779             surface->pow2Height = surface->resource.height;
780             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
781         }
782     }
783
784     switch (wined3d_settings.offscreen_rendering_mode)
785     {
786         case ORM_FBO:
787             surface->get_drawable_size = get_drawable_size_fbo;
788             break;
789
790         case ORM_BACKBUFFER:
791             surface->get_drawable_size = get_drawable_size_backbuffer;
792             break;
793
794         default:
795             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
796             return WINED3DERR_INVALIDCALL;
797     }
798
799     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
800         surface->flags |= SFLAG_DISCARDED;
801
802     return WINED3D_OK;
803 }
804
805 static void surface_realize_palette(struct wined3d_surface *surface)
806 {
807     struct wined3d_palette *palette = surface->palette;
808
809     TRACE("surface %p.\n", surface);
810
811     if (!palette) return;
812
813     if (surface->resource.format->id == WINED3DFMT_P8_UINT
814             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
815     {
816         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
817         {
818             /* Make sure the texture is up to date. This call doesn't do
819              * anything if the texture is already up to date. */
820             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
821
822             /* We want to force a palette refresh, so mark the drawable as not being up to date */
823             if (!surface_is_offscreen(surface))
824                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
825         }
826         else
827         {
828             if (!(surface->flags & SFLAG_INSYSMEM))
829             {
830                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
831                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
832             }
833             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
834         }
835     }
836
837     if (surface->flags & SFLAG_DIBSECTION)
838     {
839         RGBQUAD col[256];
840         unsigned int i;
841
842         TRACE("Updating the DC's palette.\n");
843
844         for (i = 0; i < 256; ++i)
845         {
846             col[i].rgbRed   = palette->palents[i].peRed;
847             col[i].rgbGreen = palette->palents[i].peGreen;
848             col[i].rgbBlue  = palette->palents[i].peBlue;
849             col[i].rgbReserved = 0;
850         }
851         SetDIBColorTable(surface->hDC, 0, 256, col);
852     }
853
854     /* Propagate the changes to the drawable when we have a palette. */
855     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
856         surface_load_location(surface, surface->draw_binding, NULL);
857 }
858
859 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
860 {
861     HRESULT hr;
862
863     /* If there's no destination surface there is nothing to do. */
864     if (!surface->overlay_dest)
865         return WINED3D_OK;
866
867     /* Blt calls ModifyLocation on the dest surface, which in turn calls
868      * DrawOverlay to update the overlay. Prevent an endless recursion. */
869     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
870         return WINED3D_OK;
871
872     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
873     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
874             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3D_TEXF_LINEAR);
875     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
876
877     return hr;
878 }
879
880 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
881 {
882     struct wined3d_device *device = surface->resource.device;
883     const RECT *pass_rect = rect;
884
885     TRACE("surface %p, rect %s, flags %#x.\n",
886             surface, wine_dbgstr_rect(rect), flags);
887
888     if (flags & WINED3D_MAP_DISCARD)
889     {
890         TRACE("WINED3D_MAP_DISCARD flag passed, marking SYSMEM as up to date.\n");
891         surface_prepare_system_memory(surface);
892         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
893     }
894     else
895     {
896         /* surface_load_location() does not check if the rectangle specifies
897          * the full surface. Most callers don't need that, so do it here. */
898         if (rect && !rect->top && !rect->left
899                 && rect->right == surface->resource.width
900                 && rect->bottom == surface->resource.height)
901             pass_rect = NULL;
902         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
903     }
904
905     if (surface->flags & SFLAG_PBO)
906     {
907         const struct wined3d_gl_info *gl_info;
908         struct wined3d_context *context;
909
910         context = context_acquire(device, NULL);
911         gl_info = context->gl_info;
912
913         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
914         checkGLcall("glBindBufferARB");
915
916         /* This shouldn't happen but could occur if some other function
917          * didn't handle the PBO properly. */
918         if (surface->resource.allocatedMemory)
919             ERR("The surface already has PBO memory allocated.\n");
920
921         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
922         checkGLcall("glMapBufferARB");
923
924         /* Make sure the PBO isn't set anymore in order not to break non-PBO
925          * calls. */
926         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
927         checkGLcall("glBindBufferARB");
928
929         context_release(context);
930     }
931
932     if (!(flags & (WINED3D_MAP_NO_DIRTY_UPDATE | WINED3D_MAP_READONLY)))
933     {
934         if (!rect)
935             surface_add_dirty_rect(surface, NULL);
936         else
937         {
938             struct wined3d_box b;
939
940             b.left = rect->left;
941             b.top = rect->top;
942             b.right = rect->right;
943             b.bottom = rect->bottom;
944             b.front = 0;
945             b.back = 1;
946             surface_add_dirty_rect(surface, &b);
947         }
948     }
949 }
950
951 static void surface_unmap(struct wined3d_surface *surface)
952 {
953     struct wined3d_device *device = surface->resource.device;
954     BOOL fullsurface;
955
956     TRACE("surface %p.\n", surface);
957
958     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
959
960     if (surface->flags & SFLAG_PBO)
961     {
962         const struct wined3d_gl_info *gl_info;
963         struct wined3d_context *context;
964
965         TRACE("Freeing PBO memory.\n");
966
967         context = context_acquire(device, NULL);
968         gl_info = context->gl_info;
969
970         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
971         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
972         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
973         checkGLcall("glUnmapBufferARB");
974         context_release(context);
975
976         surface->resource.allocatedMemory = NULL;
977     }
978
979     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
980
981     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
982     {
983         TRACE("Not dirtified, nothing to do.\n");
984         goto done;
985     }
986
987     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
988             && surface->container.u.swapchain->front_buffer == surface)
989     {
990         if (!surface->dirtyRect.left && !surface->dirtyRect.top
991                 && surface->dirtyRect.right == surface->resource.width
992                 && surface->dirtyRect.bottom == surface->resource.height)
993         {
994             fullsurface = TRUE;
995         }
996         else
997         {
998             /* TODO: Proper partial rectangle tracking. */
999             fullsurface = FALSE;
1000             surface->flags |= SFLAG_INSYSMEM;
1001         }
1002
1003         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
1004
1005         /* Partial rectangle tracking is not commonly implemented, it is only
1006          * done for render targets. INSYSMEM was set before to tell
1007          * surface_load_location() where to read the rectangle from.
1008          * Indrawable is set because all modifications from the partial
1009          * sysmem copy are written back to the drawable, thus the surface is
1010          * merged again in the drawable. The sysmem copy is not fully up to
1011          * date because only a subrectangle was read in Map(). */
1012         if (!fullsurface)
1013         {
1014             surface_modify_location(surface, surface->draw_binding, TRUE);
1015             surface_evict_sysmem(surface);
1016         }
1017
1018         surface->dirtyRect.left = surface->resource.width;
1019         surface->dirtyRect.top = surface->resource.height;
1020         surface->dirtyRect.right = 0;
1021         surface->dirtyRect.bottom = 0;
1022     }
1023     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1024     {
1025         FIXME("Depth / stencil buffer locking is not implemented.\n");
1026     }
1027
1028 done:
1029     /* Overlays have to be redrawn manually after changes with the GL implementation */
1030     if (surface->overlay_dest)
1031         surface_draw_overlay(surface);
1032 }
1033
1034 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1035 {
1036     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1037         return FALSE;
1038     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1039         return FALSE;
1040     return TRUE;
1041 }
1042
1043 static void surface_depth_blt_fbo(const struct wined3d_device *device,
1044         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect,
1045         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect)
1046 {
1047     const struct wined3d_gl_info *gl_info;
1048     struct wined3d_context *context;
1049     DWORD src_mask, dst_mask;
1050     GLbitfield gl_mask;
1051
1052     TRACE("device %p\n", device);
1053     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1054             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect));
1055     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1056             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect));
1057
1058     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1059     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1060
1061     if (src_mask != dst_mask)
1062     {
1063         ERR("Incompatible formats %s and %s.\n",
1064                 debug_d3dformat(src_surface->resource.format->id),
1065                 debug_d3dformat(dst_surface->resource.format->id));
1066         return;
1067     }
1068
1069     if (!src_mask)
1070     {
1071         ERR("Not a depth / stencil format: %s.\n",
1072                 debug_d3dformat(src_surface->resource.format->id));
1073         return;
1074     }
1075
1076     gl_mask = 0;
1077     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1078         gl_mask |= GL_DEPTH_BUFFER_BIT;
1079     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1080         gl_mask |= GL_STENCIL_BUFFER_BIT;
1081
1082     /* Make sure the locations are up-to-date. Loading the destination
1083      * surface isn't required if the entire surface is overwritten. */
1084     surface_load_location(src_surface, src_location, NULL);
1085     if (!surface_is_full_rect(dst_surface, dst_rect))
1086         surface_load_location(dst_surface, dst_location, NULL);
1087
1088     context = context_acquire(device, NULL);
1089     if (!context->valid)
1090     {
1091         context_release(context);
1092         WARN("Invalid context, skipping blit.\n");
1093         return;
1094     }
1095
1096     gl_info = context->gl_info;
1097
1098     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, src_location);
1099     gl_info->gl_ops.gl.p_glReadBuffer(GL_NONE);
1100     checkGLcall("glReadBuffer()");
1101     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1102
1103     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, dst_location);
1104     context_set_draw_buffer(context, GL_NONE);
1105     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1106     context_invalidate_state(context, STATE_FRAMEBUFFER);
1107
1108     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1109     {
1110         gl_info->gl_ops.gl.p_glDepthMask(GL_TRUE);
1111         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1112     }
1113     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1114     {
1115         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1116         {
1117             gl_info->gl_ops.gl.p_glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1118             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1119         }
1120         gl_info->gl_ops.gl.p_glStencilMask(~0U);
1121         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1122     }
1123
1124     gl_info->gl_ops.gl.p_glDisable(GL_SCISSOR_TEST);
1125     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1126
1127     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1128             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1129     checkGLcall("glBlitFramebuffer()");
1130
1131     if (wined3d_settings.strict_draw_ordering)
1132         gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
1133
1134     context_release(context);
1135 }
1136
1137 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1138  * Depth / stencil is not supported. */
1139 static void surface_blt_fbo(const struct wined3d_device *device, enum wined3d_texture_filter_type filter,
1140         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1141         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1142 {
1143     const struct wined3d_gl_info *gl_info;
1144     struct wined3d_context *context;
1145     RECT src_rect, dst_rect;
1146     GLenum gl_filter;
1147     GLenum buffer;
1148
1149     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1150     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1151             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1152     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1153             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1154
1155     src_rect = *src_rect_in;
1156     dst_rect = *dst_rect_in;
1157
1158     switch (filter)
1159     {
1160         case WINED3D_TEXF_LINEAR:
1161             gl_filter = GL_LINEAR;
1162             break;
1163
1164         default:
1165             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1166         case WINED3D_TEXF_NONE:
1167         case WINED3D_TEXF_POINT:
1168             gl_filter = GL_NEAREST;
1169             break;
1170     }
1171
1172     /* Resolve the source surface first if needed. */
1173     if (src_location == SFLAG_INRB_MULTISAMPLE
1174             && (src_surface->resource.format->id != dst_surface->resource.format->id
1175                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1176                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1177         src_location = SFLAG_INRB_RESOLVED;
1178
1179     /* Make sure the locations are up-to-date. Loading the destination
1180      * surface isn't required if the entire surface is overwritten. (And is
1181      * in fact harmful if we're being called by surface_load_location() with
1182      * the purpose of loading the destination surface.) */
1183     surface_load_location(src_surface, src_location, NULL);
1184     if (!surface_is_full_rect(dst_surface, &dst_rect))
1185         surface_load_location(dst_surface, dst_location, NULL);
1186
1187     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1188     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1189     else context = context_acquire(device, NULL);
1190
1191     if (!context->valid)
1192     {
1193         context_release(context);
1194         WARN("Invalid context, skipping blit.\n");
1195         return;
1196     }
1197
1198     gl_info = context->gl_info;
1199
1200     if (src_location == SFLAG_INDRAWABLE)
1201     {
1202         TRACE("Source surface %p is onscreen.\n", src_surface);
1203         buffer = surface_get_gl_buffer(src_surface);
1204         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1205     }
1206     else
1207     {
1208         TRACE("Source surface %p is offscreen.\n", src_surface);
1209         buffer = GL_COLOR_ATTACHMENT0;
1210     }
1211
1212     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1213     gl_info->gl_ops.gl.p_glReadBuffer(buffer);
1214     checkGLcall("glReadBuffer()");
1215     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1216
1217     if (dst_location == SFLAG_INDRAWABLE)
1218     {
1219         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1220         buffer = surface_get_gl_buffer(dst_surface);
1221         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1222     }
1223     else
1224     {
1225         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1226         buffer = GL_COLOR_ATTACHMENT0;
1227     }
1228
1229     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1230     context_set_draw_buffer(context, buffer);
1231     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1232     context_invalidate_state(context, STATE_FRAMEBUFFER);
1233
1234     gl_info->gl_ops.gl.p_glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1235     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1236     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1237     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1238     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1239
1240     gl_info->gl_ops.gl.p_glDisable(GL_SCISSOR_TEST);
1241     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1242
1243     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1244             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1245     checkGLcall("glBlitFramebuffer()");
1246
1247     if (wined3d_settings.strict_draw_ordering
1248             || (dst_location == SFLAG_INDRAWABLE
1249             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1250         gl_info->gl_ops.gl.p_glFlush();
1251
1252     context_release(context);
1253 }
1254
1255 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1256         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
1257         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
1258 {
1259     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1260         return FALSE;
1261
1262     /* Source and/or destination need to be on the GL side */
1263     if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
1264         return FALSE;
1265
1266     switch (blit_op)
1267     {
1268         case WINED3D_BLIT_OP_COLOR_BLIT:
1269             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1270                 return FALSE;
1271             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1272                 return FALSE;
1273             break;
1274
1275         case WINED3D_BLIT_OP_DEPTH_BLIT:
1276             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1277                 return FALSE;
1278             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1279                 return FALSE;
1280             break;
1281
1282         default:
1283             return FALSE;
1284     }
1285
1286     if (!(src_format->id == dst_format->id
1287             || (is_identity_fixup(src_format->color_fixup)
1288             && is_identity_fixup(dst_format->color_fixup))))
1289         return FALSE;
1290
1291     return TRUE;
1292 }
1293
1294 /* This function checks if the primary render target uses the 8bit paletted format. */
1295 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1296 {
1297     if (device->fb.render_targets && device->fb.render_targets[0])
1298     {
1299         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1300         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1301                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1302             return TRUE;
1303     }
1304     return FALSE;
1305 }
1306
1307 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1308         DWORD color, struct wined3d_color *float_color)
1309 {
1310     const struct wined3d_format *format = surface->resource.format;
1311     const struct wined3d_device *device = surface->resource.device;
1312
1313     switch (format->id)
1314     {
1315         case WINED3DFMT_P8_UINT:
1316             if (surface->palette)
1317             {
1318                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1319                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1320                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1321             }
1322             else
1323             {
1324                 float_color->r = 0.0f;
1325                 float_color->g = 0.0f;
1326                 float_color->b = 0.0f;
1327             }
1328             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1329             break;
1330
1331         case WINED3DFMT_B5G6R5_UNORM:
1332             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1333             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1334             float_color->b = (color & 0x1f) / 31.0f;
1335             float_color->a = 1.0f;
1336             break;
1337
1338         case WINED3DFMT_B8G8R8_UNORM:
1339         case WINED3DFMT_B8G8R8X8_UNORM:
1340             float_color->r = D3DCOLOR_R(color);
1341             float_color->g = D3DCOLOR_G(color);
1342             float_color->b = D3DCOLOR_B(color);
1343             float_color->a = 1.0f;
1344             break;
1345
1346         case WINED3DFMT_B8G8R8A8_UNORM:
1347             float_color->r = D3DCOLOR_R(color);
1348             float_color->g = D3DCOLOR_G(color);
1349             float_color->b = D3DCOLOR_B(color);
1350             float_color->a = D3DCOLOR_A(color);
1351             break;
1352
1353         default:
1354             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1355             return FALSE;
1356     }
1357
1358     return TRUE;
1359 }
1360
1361 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1362 {
1363     const struct wined3d_format *format = surface->resource.format;
1364
1365     switch (format->id)
1366     {
1367         case WINED3DFMT_S1_UINT_D15_UNORM:
1368             *float_depth = depth / (float)0x00007fff;
1369             break;
1370
1371         case WINED3DFMT_D16_UNORM:
1372             *float_depth = depth / (float)0x0000ffff;
1373             break;
1374
1375         case WINED3DFMT_D24_UNORM_S8_UINT:
1376         case WINED3DFMT_X8D24_UNORM:
1377             *float_depth = depth / (float)0x00ffffff;
1378             break;
1379
1380         case WINED3DFMT_D32_UNORM:
1381             *float_depth = depth / (float)0xffffffff;
1382             break;
1383
1384         default:
1385             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1386             return FALSE;
1387     }
1388
1389     return TRUE;
1390 }
1391
1392 /* Do not call while under the GL lock. */
1393 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1394 {
1395     const struct wined3d_resource *resource = &surface->resource;
1396     struct wined3d_device *device = resource->device;
1397     const struct blit_shader *blitter;
1398
1399     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1400             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1401     if (!blitter)
1402     {
1403         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1404         return WINED3DERR_INVALIDCALL;
1405     }
1406
1407     return blitter->depth_fill(device, surface, rect, depth);
1408 }
1409
1410 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect,
1411         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect)
1412 {
1413     struct wined3d_device *device = src_surface->resource.device;
1414
1415     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1416             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1417             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1418         return WINED3DERR_INVALIDCALL;
1419
1420     surface_depth_blt_fbo(device, src_surface, src_location, src_rect, dst_surface, dst_location, dst_rect);
1421
1422     surface_modify_ds_location(dst_surface, dst_location,
1423             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1424
1425     return WINED3D_OK;
1426 }
1427
1428 /* Do not call while under the GL lock. */
1429 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1430         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1431         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
1432 {
1433     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1434     struct wined3d_device *device = dst_surface->resource.device;
1435     DWORD src_ds_flags, dst_ds_flags;
1436     RECT src_rect, dst_rect;
1437     BOOL scale, convert;
1438
1439     static const DWORD simple_blit = WINEDDBLT_ASYNC
1440             | WINEDDBLT_COLORFILL
1441             | WINEDDBLT_WAIT
1442             | WINEDDBLT_DEPTHFILL
1443             | WINEDDBLT_DONOTWAIT;
1444
1445     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1446             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1447             flags, fx, debug_d3dtexturefiltertype(filter));
1448     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1449
1450     if (fx)
1451     {
1452         TRACE("dwSize %#x.\n", fx->dwSize);
1453         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1454         TRACE("dwROP %#x.\n", fx->dwROP);
1455         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1456         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1457         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1458         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1459         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1460         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1461         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1462         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1463         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1464         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1465         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1466         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1467         TRACE("dwReserved %#x.\n", fx->dwReserved);
1468         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1469         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1470         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1471         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1472         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1473         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1474                 fx->ddckDestColorkey.color_space_low_value,
1475                 fx->ddckDestColorkey.color_space_high_value);
1476         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1477                 fx->ddckSrcColorkey.color_space_low_value,
1478                 fx->ddckSrcColorkey.color_space_high_value);
1479     }
1480
1481     if (dst_surface->resource.map_count || (src_surface && src_surface->resource.map_count))
1482     {
1483         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1484         return WINEDDERR_SURFACEBUSY;
1485     }
1486
1487     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1488
1489     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1490             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1491             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1492             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1493             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1494     {
1495         WARN("The application gave us a bad destination rectangle.\n");
1496         return WINEDDERR_INVALIDRECT;
1497     }
1498
1499     if (src_surface)
1500     {
1501         surface_get_rect(src_surface, src_rect_in, &src_rect);
1502
1503         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1504                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1505                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1506                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1507                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1508         {
1509             WARN("Application gave us bad source rectangle for Blt.\n");
1510             return WINEDDERR_INVALIDRECT;
1511         }
1512     }
1513     else
1514     {
1515         memset(&src_rect, 0, sizeof(src_rect));
1516     }
1517
1518     if (!fx || !(fx->dwDDFX))
1519         flags &= ~WINEDDBLT_DDFX;
1520
1521     if (flags & WINEDDBLT_WAIT)
1522         flags &= ~WINEDDBLT_WAIT;
1523
1524     if (flags & WINEDDBLT_ASYNC)
1525     {
1526         static unsigned int once;
1527
1528         if (!once++)
1529             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1530         flags &= ~WINEDDBLT_ASYNC;
1531     }
1532
1533     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1534     if (flags & WINEDDBLT_DONOTWAIT)
1535     {
1536         static unsigned int once;
1537
1538         if (!once++)
1539             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1540         flags &= ~WINEDDBLT_DONOTWAIT;
1541     }
1542
1543     if (!device->d3d_initialized)
1544     {
1545         WARN("D3D not initialized, using fallback.\n");
1546         goto cpu;
1547     }
1548
1549     /* We want to avoid invalidating the sysmem location for converted
1550      * surfaces, since otherwise we'd have to convert the data back when
1551      * locking them. */
1552     if (dst_surface->flags & SFLAG_CONVERTED)
1553     {
1554         WARN("Converted surface, using CPU blit.\n");
1555         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1556     }
1557
1558     if (flags & ~simple_blit)
1559     {
1560         WARN("Using fallback for complex blit (%#x).\n", flags);
1561         goto fallback;
1562     }
1563
1564     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1565         src_swapchain = src_surface->container.u.swapchain;
1566     else
1567         src_swapchain = NULL;
1568
1569     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1570         dst_swapchain = dst_surface->container.u.swapchain;
1571     else
1572         dst_swapchain = NULL;
1573
1574     /* This isn't strictly needed. FBO blits for example could deal with
1575      * cross-swapchain blits by first downloading the source to a texture
1576      * before switching to the destination context. We just have this here to
1577      * not have to deal with the issue, since cross-swapchain blits should be
1578      * rare. */
1579     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1580     {
1581         FIXME("Using fallback for cross-swapchain blit.\n");
1582         goto fallback;
1583     }
1584
1585     scale = src_surface
1586             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1587             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1588     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1589
1590     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1591     if (src_surface)
1592         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1593     else
1594         src_ds_flags = 0;
1595
1596     if (src_ds_flags || dst_ds_flags)
1597     {
1598         if (flags & WINEDDBLT_DEPTHFILL)
1599         {
1600             float depth;
1601
1602             TRACE("Depth fill.\n");
1603
1604             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1605                 return WINED3DERR_INVALIDCALL;
1606
1607             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1608                 return WINED3D_OK;
1609         }
1610         else
1611         {
1612             if (src_ds_flags != dst_ds_flags)
1613             {
1614                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1615                 return WINED3DERR_INVALIDCALL;
1616             }
1617
1618             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, src_surface->draw_binding, &src_rect,
1619                     dst_surface, dst_surface->draw_binding, &dst_rect)))
1620                 return WINED3D_OK;
1621         }
1622     }
1623     else
1624     {
1625         /* In principle this would apply to depth blits as well, but we don't
1626          * implement those in the CPU blitter at the moment. */
1627         if ((dst_surface->flags & SFLAG_INSYSMEM)
1628                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1629         {
1630             if (scale)
1631                 TRACE("Not doing sysmem blit because of scaling.\n");
1632             else if (convert)
1633                 TRACE("Not doing sysmem blit because of format conversion.\n");
1634             else
1635                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1636         }
1637
1638         if (flags & WINEDDBLT_COLORFILL)
1639         {
1640             struct wined3d_color color;
1641
1642             TRACE("Color fill.\n");
1643
1644             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1645                 goto fallback;
1646
1647             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1648                 return WINED3D_OK;
1649         }
1650         else
1651         {
1652             TRACE("Color blit.\n");
1653
1654             /* Upload */
1655             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1656             {
1657                 if (scale)
1658                     TRACE("Not doing upload because of scaling.\n");
1659                 else if (convert)
1660                     TRACE("Not doing upload because of format conversion.\n");
1661                 else
1662                 {
1663                     POINT dst_point = {dst_rect.left, dst_rect.top};
1664
1665                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1666                     {
1667                         if (!surface_is_offscreen(dst_surface))
1668                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1669                         return WINED3D_OK;
1670                     }
1671                 }
1672             }
1673
1674             /* Use present for back -> front blits. The idea behind this is
1675              * that present is potentially faster than a blit, in particular
1676              * when FBO blits aren't available. Some ddraw applications like
1677              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1678              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1679              * applications can't blit directly to the frontbuffer. */
1680             if (dst_swapchain && dst_swapchain->back_buffers
1681                     && dst_surface == dst_swapchain->front_buffer
1682                     && src_surface == dst_swapchain->back_buffers[0])
1683             {
1684                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1685
1686                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1687
1688                 /* Set the swap effect to COPY, we don't want the backbuffer
1689                  * to become undefined. */
1690                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1691                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1692                 dst_swapchain->desc.swap_effect = swap_effect;
1693
1694                 return WINED3D_OK;
1695             }
1696
1697             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1698                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1699                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1700             {
1701                 TRACE("Using FBO blit.\n");
1702
1703                 surface_blt_fbo(device, filter,
1704                         src_surface, src_surface->draw_binding, &src_rect,
1705                         dst_surface, dst_surface->draw_binding, &dst_rect);
1706                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1707                 return WINED3D_OK;
1708             }
1709
1710             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1711                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1712                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1713             {
1714                 TRACE("Using arbfp blit.\n");
1715
1716                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1717                     return WINED3D_OK;
1718             }
1719         }
1720     }
1721
1722 fallback:
1723
1724     /* Special cases for render targets. */
1725     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1726             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1727     {
1728         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1729                 src_surface, &src_rect, flags, fx, filter)))
1730             return WINED3D_OK;
1731     }
1732
1733 cpu:
1734
1735     /* For the rest call the X11 surface implementation. For render targets
1736      * this should be implemented OpenGL accelerated in BltOverride, other
1737      * blits are rather rare. */
1738     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1739 }
1740
1741 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1742         struct wined3d_surface *render_target)
1743 {
1744     TRACE("surface %p, render_target %p.\n", surface, render_target);
1745
1746     /* TODO: Check surface sizes, pools, etc. */
1747
1748     if (render_target->resource.multisample_type)
1749         return WINED3DERR_INVALIDCALL;
1750
1751     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3D_TEXF_POINT);
1752 }
1753
1754 /* Context activation is done by the caller. */
1755 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1756 {
1757     if (surface->flags & SFLAG_DIBSECTION)
1758     {
1759         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1760     }
1761     else
1762     {
1763         if (!surface->resource.heapMemory)
1764             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1765         else if (!(surface->flags & SFLAG_CLIENT))
1766             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1767                     surface, surface->resource.heapMemory, surface->flags);
1768
1769         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1770                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1771     }
1772
1773     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1774     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1775     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1776             surface->resource.size, surface->resource.allocatedMemory));
1777     checkGLcall("glGetBufferSubDataARB");
1778     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1779     checkGLcall("glDeleteBuffersARB");
1780
1781     surface->pbo = 0;
1782     surface->flags &= ~SFLAG_PBO;
1783 }
1784
1785 static BOOL surface_init_sysmem(struct wined3d_surface *surface)
1786 {
1787     if (!surface->resource.allocatedMemory)
1788     {
1789         if (!surface->resource.heapMemory)
1790         {
1791             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
1792                     surface->resource.size + RESOURCE_ALIGNMENT)))
1793             {
1794                 ERR("Failed to allocate memory.\n");
1795                 return FALSE;
1796             }
1797         }
1798         else if (!(surface->flags & SFLAG_CLIENT))
1799         {
1800             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1801                     surface, surface->resource.heapMemory, surface->flags);
1802         }
1803
1804         surface->resource.allocatedMemory =
1805             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1806     }
1807     else
1808     {
1809         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
1810     }
1811
1812     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1813
1814     return TRUE;
1815 }
1816
1817 /* Do not call while under the GL lock. */
1818 static void surface_unload(struct wined3d_resource *resource)
1819 {
1820     struct wined3d_surface *surface = surface_from_resource(resource);
1821     struct wined3d_renderbuffer_entry *entry, *entry2;
1822     struct wined3d_device *device = resource->device;
1823     const struct wined3d_gl_info *gl_info;
1824     struct wined3d_context *context;
1825
1826     TRACE("surface %p.\n", surface);
1827
1828     if (resource->pool == WINED3D_POOL_DEFAULT)
1829     {
1830         /* Default pool resources are supposed to be destroyed before Reset is called.
1831          * Implicit resources stay however. So this means we have an implicit render target
1832          * or depth stencil. The content may be destroyed, but we still have to tear down
1833          * opengl resources, so we cannot leave early.
1834          *
1835          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1836          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1837          * or the depth stencil into an FBO the texture or render buffer will be removed
1838          * and all flags get lost
1839          */
1840         if (!(surface->flags & SFLAG_PBO))
1841             surface_init_sysmem(surface);
1842         /* We also get here when the ddraw swapchain is destroyed, for example
1843          * for a mode switch. In this case this surface won't necessarily be
1844          * an implicit surface. We have to mark it lost so that the
1845          * application can restore it after the mode switch. */
1846         surface->flags |= SFLAG_LOST;
1847     }
1848     else
1849     {
1850         /* Load the surface into system memory */
1851         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1852         surface_modify_location(surface, surface->draw_binding, FALSE);
1853     }
1854     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1855     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1856     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1857
1858     context = context_acquire(device, NULL);
1859     gl_info = context->gl_info;
1860
1861     /* Destroy PBOs, but load them into real sysmem before */
1862     if (surface->flags & SFLAG_PBO)
1863         surface_remove_pbo(surface, gl_info);
1864
1865     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1866      * all application-created targets the application has to release the surface
1867      * before calling _Reset
1868      */
1869     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1870     {
1871         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1872         list_remove(&entry->entry);
1873         HeapFree(GetProcessHeap(), 0, entry);
1874     }
1875     list_init(&surface->renderbuffers);
1876     surface->current_renderbuffer = NULL;
1877
1878     /* If we're in a texture, the texture name belongs to the texture.
1879      * Otherwise, destroy it. */
1880     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1881     {
1882         gl_info->gl_ops.gl.p_glDeleteTextures(1, &surface->texture_name);
1883         surface->texture_name = 0;
1884         gl_info->gl_ops.gl.p_glDeleteTextures(1, &surface->texture_name_srgb);
1885         surface->texture_name_srgb = 0;
1886     }
1887     if (surface->rb_multisample)
1888     {
1889         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1890         surface->rb_multisample = 0;
1891     }
1892     if (surface->rb_resolved)
1893     {
1894         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1895         surface->rb_resolved = 0;
1896     }
1897
1898     context_release(context);
1899
1900     resource_unload(resource);
1901 }
1902
1903 static const struct wined3d_resource_ops surface_resource_ops =
1904 {
1905     surface_unload,
1906 };
1907
1908 static const struct wined3d_surface_ops surface_ops =
1909 {
1910     surface_private_setup,
1911     surface_realize_palette,
1912     surface_map,
1913     surface_unmap,
1914 };
1915
1916 /*****************************************************************************
1917  * Initializes the GDI surface, aka creates the DIB section we render to
1918  * The DIB section creation is done by calling GetDC, which will create the
1919  * section and releasing the dc to allow the app to use it. The dib section
1920  * will stay until the surface is released
1921  *
1922  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1923  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1924  * avoid confusion in the shared surface code.
1925  *
1926  * Returns:
1927  *  WINED3D_OK on success
1928  *  The return values of called methods on failure
1929  *
1930  *****************************************************************************/
1931 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1932 {
1933     HRESULT hr;
1934
1935     TRACE("surface %p.\n", surface);
1936
1937     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1938     {
1939         ERR("Overlays not yet supported by GDI surfaces.\n");
1940         return WINED3DERR_INVALIDCALL;
1941     }
1942
1943     /* Sysmem textures have memory already allocated - release it,
1944      * this avoids an unnecessary memcpy. */
1945     hr = surface_create_dib_section(surface);
1946     if (SUCCEEDED(hr))
1947     {
1948         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1949         surface->resource.heapMemory = NULL;
1950         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1951     }
1952
1953     /* We don't mind the nonpow2 stuff in GDI. */
1954     surface->pow2Width = surface->resource.width;
1955     surface->pow2Height = surface->resource.height;
1956
1957     return WINED3D_OK;
1958 }
1959
1960 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1961 {
1962     struct wined3d_palette *palette = surface->palette;
1963
1964     TRACE("surface %p.\n", surface);
1965
1966     if (!palette) return;
1967
1968     if (surface->flags & SFLAG_DIBSECTION)
1969     {
1970         RGBQUAD col[256];
1971         unsigned int i;
1972
1973         TRACE("Updating the DC's palette.\n");
1974
1975         for (i = 0; i < 256; ++i)
1976         {
1977             col[i].rgbRed = palette->palents[i].peRed;
1978             col[i].rgbGreen = palette->palents[i].peGreen;
1979             col[i].rgbBlue = palette->palents[i].peBlue;
1980             col[i].rgbReserved = 0;
1981         }
1982         SetDIBColorTable(surface->hDC, 0, 256, col);
1983     }
1984
1985     /* Update the image because of the palette change. Some games like e.g.
1986      * Red Alert call SetEntries a lot to implement fading. */
1987     /* Tell the swapchain to update the screen. */
1988     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1989     {
1990         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1991         if (surface == swapchain->front_buffer)
1992         {
1993             x11_copy_to_screen(swapchain, NULL);
1994         }
1995     }
1996 }
1997
1998 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1999 {
2000     TRACE("surface %p, rect %s, flags %#x.\n",
2001             surface, wine_dbgstr_rect(rect), flags);
2002
2003     if (!(surface->flags & SFLAG_DIBSECTION))
2004     {
2005         HRESULT hr;
2006
2007         /* This happens on gdi surfaces if the application set a user pointer
2008          * and resets it. Recreate the DIB section. */
2009         if (FAILED(hr = surface_create_dib_section(surface)))
2010         {
2011             ERR("Failed to create dib section, hr %#x.\n", hr);
2012             return;
2013         }
2014         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
2015         surface->resource.heapMemory = NULL;
2016         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2017     }
2018 }
2019
2020 static void gdi_surface_unmap(struct wined3d_surface *surface)
2021 {
2022     TRACE("surface %p.\n", surface);
2023
2024     /* Tell the swapchain to update the screen. */
2025     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2026     {
2027         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2028         if (surface == swapchain->front_buffer)
2029         {
2030             x11_copy_to_screen(swapchain, &surface->lockedRect);
2031         }
2032     }
2033
2034     memset(&surface->lockedRect, 0, sizeof(RECT));
2035 }
2036
2037 static const struct wined3d_surface_ops gdi_surface_ops =
2038 {
2039     gdi_surface_private_setup,
2040     gdi_surface_realize_palette,
2041     gdi_surface_map,
2042     gdi_surface_unmap,
2043 };
2044
2045 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2046 {
2047     GLuint *name;
2048     DWORD flag;
2049
2050     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2051
2052     if(srgb)
2053     {
2054         name = &surface->texture_name_srgb;
2055         flag = SFLAG_INSRGBTEX;
2056     }
2057     else
2058     {
2059         name = &surface->texture_name;
2060         flag = SFLAG_INTEXTURE;
2061     }
2062
2063     if (!*name && new_name)
2064     {
2065         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2066          * surface has no texture name yet. See if we can get rid of this. */
2067         if (surface->flags & flag)
2068         {
2069             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2070             surface_modify_location(surface, flag, FALSE);
2071         }
2072     }
2073
2074     *name = new_name;
2075     surface_force_reload(surface);
2076 }
2077
2078 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target, GLint level)
2079 {
2080     TRACE("surface %p, target %#x.\n", surface, target);
2081
2082     if (surface->texture_target != target)
2083     {
2084         if (target == GL_TEXTURE_RECTANGLE_ARB)
2085         {
2086             surface->flags &= ~SFLAG_NORMCOORD;
2087         }
2088         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2089         {
2090             surface->flags |= SFLAG_NORMCOORD;
2091         }
2092     }
2093     surface->texture_target = target;
2094     surface->texture_level = level;
2095     surface_force_reload(surface);
2096 }
2097
2098 /* This call just downloads data, the caller is responsible for binding the
2099  * correct texture. */
2100 /* Context activation is done by the caller. */
2101 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2102 {
2103     const struct wined3d_format *format = surface->resource.format;
2104
2105     /* Only support read back of converted P8 surfaces. */
2106     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2107     {
2108         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2109         return;
2110     }
2111
2112     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2113     {
2114         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2115                 surface, surface->texture_level, format->glFormat, format->glType,
2116                 surface->resource.allocatedMemory);
2117
2118         if (surface->flags & SFLAG_PBO)
2119         {
2120             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2121             checkGLcall("glBindBufferARB");
2122             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2123             checkGLcall("glGetCompressedTexImageARB");
2124             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2125             checkGLcall("glBindBufferARB");
2126         }
2127         else
2128         {
2129             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2130                     surface->texture_level, surface->resource.allocatedMemory));
2131             checkGLcall("glGetCompressedTexImageARB");
2132         }
2133     }
2134     else
2135     {
2136         void *mem;
2137         GLenum gl_format = format->glFormat;
2138         GLenum gl_type = format->glType;
2139         int src_pitch = 0;
2140         int dst_pitch = 0;
2141
2142         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2143         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2144         {
2145             gl_format = GL_ALPHA;
2146             gl_type = GL_UNSIGNED_BYTE;
2147         }
2148
2149         if (surface->flags & SFLAG_NONPOW2)
2150         {
2151             unsigned char alignment = surface->resource.device->surface_alignment;
2152             src_pitch = format->byte_count * surface->pow2Width;
2153             dst_pitch = wined3d_surface_get_pitch(surface);
2154             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2155             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2156         }
2157         else
2158         {
2159             mem = surface->resource.allocatedMemory;
2160         }
2161
2162         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2163                 surface, surface->texture_level, gl_format, gl_type, mem);
2164
2165         if (surface->flags & SFLAG_PBO)
2166         {
2167             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2168             checkGLcall("glBindBufferARB");
2169
2170             gl_info->gl_ops.gl.p_glGetTexImage(surface->texture_target, surface->texture_level,
2171                     gl_format, gl_type, NULL);
2172             checkGLcall("glGetTexImage");
2173
2174             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2175             checkGLcall("glBindBufferARB");
2176         }
2177         else
2178         {
2179             gl_info->gl_ops.gl.p_glGetTexImage(surface->texture_target, surface->texture_level,
2180                     gl_format, gl_type, mem);
2181             checkGLcall("glGetTexImage");
2182         }
2183
2184         if (surface->flags & SFLAG_NONPOW2)
2185         {
2186             const BYTE *src_data;
2187             BYTE *dst_data;
2188             UINT y;
2189             /*
2190              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2191              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2192              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2193              *
2194              * We're doing this...
2195              *
2196              * instead of boxing the texture :
2197              * |<-texture width ->|  -->pow2width|   /\
2198              * |111111111111111111|              |   |
2199              * |222 Texture 222222| boxed empty  | texture height
2200              * |3333 Data 33333333|              |   |
2201              * |444444444444444444|              |   \/
2202              * -----------------------------------   |
2203              * |     boxed  empty | boxed empty  | pow2height
2204              * |                  |              |   \/
2205              * -----------------------------------
2206              *
2207              *
2208              * we're repacking the data to the expected texture width
2209              *
2210              * |<-texture width ->|  -->pow2width|   /\
2211              * |111111111111111111222222222222222|   |
2212              * |222333333333333333333444444444444| texture height
2213              * |444444                           |   |
2214              * |                                 |   \/
2215              * |                                 |   |
2216              * |            empty                | pow2height
2217              * |                                 |   \/
2218              * -----------------------------------
2219              *
2220              * == is the same as
2221              *
2222              * |<-texture width ->|    /\
2223              * |111111111111111111|
2224              * |222222222222222222|texture height
2225              * |333333333333333333|
2226              * |444444444444444444|    \/
2227              * --------------------
2228              *
2229              * this also means that any references to allocatedMemory should work with the data as if were a
2230              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2231              *
2232              * internally the texture is still stored in a boxed format so any references to textureName will
2233              * get a boxed texture with width pow2width and not a texture of width resource.width.
2234              *
2235              * Performance should not be an issue, because applications normally do not lock the surfaces when
2236              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2237              * and doesn't have to be re-read. */
2238             src_data = mem;
2239             dst_data = surface->resource.allocatedMemory;
2240             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2241             for (y = 1; y < surface->resource.height; ++y)
2242             {
2243                 /* skip the first row */
2244                 src_data += src_pitch;
2245                 dst_data += dst_pitch;
2246                 memcpy(dst_data, src_data, dst_pitch);
2247             }
2248
2249             HeapFree(GetProcessHeap(), 0, mem);
2250         }
2251     }
2252
2253     /* Surface has now been downloaded */
2254     surface->flags |= SFLAG_INSYSMEM;
2255 }
2256
2257 /* This call just uploads data, the caller is responsible for binding the
2258  * correct texture. */
2259 /* Context activation is done by the caller. */
2260 static void surface_upload_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2261         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2262         BOOL srgb, const struct wined3d_bo_address *data)
2263 {
2264     UINT update_w = src_rect->right - src_rect->left;
2265     UINT update_h = src_rect->bottom - src_rect->top;
2266
2267     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2268             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2269             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2270
2271     if (surface->resource.map_count)
2272     {
2273         WARN("Uploading a surface that is currently mapped, setting SFLAG_PIN_SYSMEM.\n");
2274         surface->flags |= SFLAG_PIN_SYSMEM;
2275     }
2276
2277     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2278     {
2279         update_h *= format->height_scale.numerator;
2280         update_h /= format->height_scale.denominator;
2281     }
2282
2283     if (data->buffer_object)
2284     {
2285         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2286         checkGLcall("glBindBufferARB");
2287     }
2288
2289     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2290     {
2291         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2292         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2293         const BYTE *addr = data->addr;
2294         GLenum internal;
2295
2296         addr += (src_rect->top / format->block_height) * src_pitch;
2297         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2298
2299         if (srgb)
2300             internal = format->glGammaInternal;
2301         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2302             internal = format->rtInternal;
2303         else
2304             internal = format->glInternal;
2305
2306         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2307                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2308                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2309
2310         if (row_length == src_pitch)
2311         {
2312             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2313                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2314         }
2315         else
2316         {
2317             UINT row, y;
2318
2319             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2320              * can't use the unpack row length like below. */
2321             for (row = 0, y = dst_point->y; row < row_count; ++row)
2322             {
2323                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2324                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2325                 y += format->block_height;
2326                 addr += src_pitch;
2327             }
2328         }
2329         checkGLcall("glCompressedTexSubImage2DARB");
2330     }
2331     else
2332     {
2333         const BYTE *addr = data->addr;
2334
2335         addr += src_rect->top * src_pitch;
2336         addr += src_rect->left * format->byte_count;
2337
2338         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2339                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2340                 update_w, update_h, format->glFormat, format->glType, addr);
2341
2342         gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2343         gl_info->gl_ops.gl.p_glTexSubImage2D(surface->texture_target, surface->texture_level,
2344                 dst_point->x, dst_point->y, update_w, update_h, format->glFormat, format->glType, addr);
2345         gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2346         checkGLcall("glTexSubImage2D");
2347     }
2348
2349     if (data->buffer_object)
2350     {
2351         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2352         checkGLcall("glBindBufferARB");
2353     }
2354
2355     if (wined3d_settings.strict_draw_ordering)
2356         gl_info->gl_ops.gl.p_glFlush();
2357
2358     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2359     {
2360         struct wined3d_device *device = surface->resource.device;
2361         unsigned int i;
2362
2363         for (i = 0; i < device->context_count; ++i)
2364         {
2365             context_surface_update(device->contexts[i], surface);
2366         }
2367     }
2368 }
2369
2370 static HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck, BOOL use_texturing,
2371         struct wined3d_format *format, enum wined3d_conversion_type *conversion_type)
2372 {
2373     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2374     const struct wined3d_device *device = surface->resource.device;
2375     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
2376     BOOL blit_supported = FALSE;
2377
2378     /* Copy the default values from the surface. Below we might perform fixups */
2379     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
2380     *format = *surface->resource.format;
2381     *conversion_type = WINED3D_CT_NONE;
2382
2383     /* Ok, now look if we have to do any conversion */
2384     switch (surface->resource.format->id)
2385     {
2386         case WINED3DFMT_P8_UINT:
2387             /* Below the call to blit_supported is disabled for Wine 1.2
2388              * because the function isn't operating correctly yet. At the
2389              * moment 8-bit blits are handled in software and if certain GL
2390              * extensions are around, surface conversion is performed at
2391              * upload time. The blit_supported call recognizes it as a
2392              * destination fixup. This type of upload 'fixup' and 8-bit to
2393              * 8-bit blits need to be handled by the blit_shader.
2394              * TODO: get rid of this #if 0. */
2395 #if 0
2396             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
2397                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
2398                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
2399 #endif
2400             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
2401
2402             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
2403              * texturing. Further also use conversion in case of color keying.
2404              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
2405              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
2406              * conflicts with this.
2407              */
2408             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
2409                     || colorkey_active || !use_texturing)
2410             {
2411                 format->glFormat = GL_RGBA;
2412                 format->glInternal = GL_RGBA;
2413                 format->glType = GL_UNSIGNED_BYTE;
2414                 format->conv_byte_count = 4;
2415                 if (colorkey_active)
2416                     *conversion_type = WINED3D_CT_PALETTED_CK;
2417                 else
2418                     *conversion_type = WINED3D_CT_PALETTED;
2419             }
2420             break;
2421
2422         case WINED3DFMT_B2G3R3_UNORM:
2423             /* **********************
2424                 GL_UNSIGNED_BYTE_3_3_2
2425                 ********************** */
2426             if (colorkey_active) {
2427                 /* This texture format will never be used.. So do not care about color keying
2428                     up until the point in time it will be needed :-) */
2429                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
2430             }
2431             break;
2432
2433         case WINED3DFMT_B5G6R5_UNORM:
2434             if (colorkey_active)
2435             {
2436                 *conversion_type = WINED3D_CT_CK_565;
2437                 format->glFormat = GL_RGBA;
2438                 format->glInternal = GL_RGB5_A1;
2439                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
2440                 format->conv_byte_count = 2;
2441             }
2442             break;
2443
2444         case WINED3DFMT_B5G5R5X1_UNORM:
2445             if (colorkey_active)
2446             {
2447                 *conversion_type = WINED3D_CT_CK_5551;
2448                 format->glFormat = GL_BGRA;
2449                 format->glInternal = GL_RGB5_A1;
2450                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
2451                 format->conv_byte_count = 2;
2452             }
2453             break;
2454
2455         case WINED3DFMT_B8G8R8_UNORM:
2456             if (colorkey_active)
2457             {
2458                 *conversion_type = WINED3D_CT_CK_RGB24;
2459                 format->glFormat = GL_RGBA;
2460                 format->glInternal = GL_RGBA8;
2461                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
2462                 format->conv_byte_count = 4;
2463             }
2464             break;
2465
2466         case WINED3DFMT_B8G8R8X8_UNORM:
2467             if (colorkey_active)
2468             {
2469                 *conversion_type = WINED3D_CT_RGB32_888;
2470                 format->glFormat = GL_RGBA;
2471                 format->glInternal = GL_RGBA8;
2472                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
2473                 format->conv_byte_count = 4;
2474             }
2475             break;
2476
2477         case WINED3DFMT_B8G8R8A8_UNORM:
2478             if (colorkey_active)
2479             {
2480                 *conversion_type = WINED3D_CT_CK_ARGB32;
2481                 format->conv_byte_count = 4;
2482             }
2483             break;
2484
2485         default:
2486             break;
2487     }
2488
2489     if (*conversion_type != WINED3D_CT_NONE)
2490     {
2491         format->rtInternal = format->glInternal;
2492         format->glGammaInternal = format->glInternal;
2493     }
2494
2495     return WINED3D_OK;
2496 }
2497
2498 static BOOL surface_check_block_align(struct wined3d_surface *surface, const RECT *rect)
2499 {
2500     UINT width_mask, height_mask;
2501
2502     if (!rect->left && !rect->top
2503             && rect->right == surface->resource.width
2504             && rect->bottom == surface->resource.height)
2505         return TRUE;
2506
2507     /* This assumes power of two block sizes, but NPOT block sizes would be
2508      * silly anyway. */
2509     width_mask = surface->resource.format->block_width - 1;
2510     height_mask = surface->resource.format->block_height - 1;
2511
2512     if (!(rect->left & width_mask) && !(rect->top & height_mask)
2513             && !(rect->right & width_mask) && !(rect->bottom & height_mask))
2514         return TRUE;
2515
2516     return FALSE;
2517 }
2518
2519 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2520         struct wined3d_surface *src_surface, const RECT *src_rect)
2521 {
2522     const struct wined3d_format *src_format;
2523     const struct wined3d_format *dst_format;
2524     const struct wined3d_gl_info *gl_info;
2525     enum wined3d_conversion_type convert;
2526     struct wined3d_context *context;
2527     struct wined3d_bo_address data;
2528     struct wined3d_format format;
2529     UINT update_w, update_h;
2530     UINT dst_w, dst_h;
2531     RECT r, dst_rect;
2532     UINT src_pitch;
2533     POINT p;
2534
2535     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2536             dst_surface, wine_dbgstr_point(dst_point),
2537             src_surface, wine_dbgstr_rect(src_rect));
2538
2539     src_format = src_surface->resource.format;
2540     dst_format = dst_surface->resource.format;
2541
2542     if (src_format->id != dst_format->id)
2543     {
2544         WARN("Source and destination surfaces should have the same format.\n");
2545         return WINED3DERR_INVALIDCALL;
2546     }
2547
2548     if (!dst_point)
2549     {
2550         p.x = 0;
2551         p.y = 0;
2552         dst_point = &p;
2553     }
2554     else if (dst_point->x < 0 || dst_point->y < 0)
2555     {
2556         WARN("Invalid destination point.\n");
2557         return WINED3DERR_INVALIDCALL;
2558     }
2559
2560     if (!src_rect)
2561     {
2562         r.left = 0;
2563         r.top = 0;
2564         r.right = src_surface->resource.width;
2565         r.bottom = src_surface->resource.height;
2566         src_rect = &r;
2567     }
2568     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2569             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2570     {
2571         WARN("Invalid source rectangle.\n");
2572         return WINED3DERR_INVALIDCALL;
2573     }
2574
2575     dst_w = dst_surface->resource.width;
2576     dst_h = dst_surface->resource.height;
2577
2578     update_w = src_rect->right - src_rect->left;
2579     update_h = src_rect->bottom - src_rect->top;
2580
2581     if (update_w > dst_w || dst_point->x > dst_w - update_w
2582             || update_h > dst_h || dst_point->y > dst_h - update_h)
2583     {
2584         WARN("Destination out of bounds.\n");
2585         return WINED3DERR_INVALIDCALL;
2586     }
2587
2588     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS) && !surface_check_block_align(src_surface, src_rect))
2589     {
2590         WARN("Source rectangle not block-aligned.\n");
2591         return WINED3DERR_INVALIDCALL;
2592     }
2593
2594     SetRect(&dst_rect, dst_point->x, dst_point->y, dst_point->x + update_w, dst_point->y + update_h);
2595     if ((dst_format->flags & WINED3DFMT_FLAG_BLOCKS) && !surface_check_block_align(dst_surface, &dst_rect))
2596     {
2597         WARN("Destination rectangle not block-aligned.\n");
2598         return WINED3DERR_INVALIDCALL;
2599     }
2600
2601     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2602     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2603     if (convert != WINED3D_CT_NONE || format.convert)
2604         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3D_TEXF_POINT);
2605
2606     context = context_acquire(dst_surface->resource.device, NULL);
2607     gl_info = context->gl_info;
2608
2609     /* Only load the surface for partial updates. For newly allocated texture
2610      * the texture wouldn't be the current location, and we'd upload zeroes
2611      * just to overwrite them again. */
2612     if (update_w == dst_w && update_h == dst_h)
2613         surface_prepare_texture(dst_surface, context, FALSE);
2614     else
2615         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2616     surface_bind(dst_surface, context, FALSE);
2617
2618     data.buffer_object = src_surface->pbo;
2619     data.addr = src_surface->resource.allocatedMemory;
2620     src_pitch = wined3d_surface_get_pitch(src_surface);
2621
2622     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2623
2624     invalidate_active_texture(dst_surface->resource.device, context);
2625
2626     context_release(context);
2627
2628     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2629     return WINED3D_OK;
2630 }
2631
2632 /* This call just allocates the texture, the caller is responsible for binding
2633  * the correct texture. */
2634 /* Context activation is done by the caller. */
2635 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2636         const struct wined3d_format *format, BOOL srgb)
2637 {
2638     BOOL enable_client_storage = FALSE;
2639     GLsizei width = surface->pow2Width;
2640     GLsizei height = surface->pow2Height;
2641     const BYTE *mem = NULL;
2642     GLenum internal;
2643
2644     if (srgb)
2645     {
2646         internal = format->glGammaInternal;
2647     }
2648     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2649     {
2650         internal = format->rtInternal;
2651     }
2652     else
2653     {
2654         internal = format->glInternal;
2655     }
2656
2657     if (!internal)
2658         FIXME("No GL internal format for format %s.\n", debug_d3dformat(format->id));
2659
2660     if (format->flags & WINED3DFMT_FLAG_HEIGHT_SCALE)
2661     {
2662         height *= format->height_scale.numerator;
2663         height /= format->height_scale.denominator;
2664     }
2665
2666     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2667             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2668             internal, width, height, format->glFormat, format->glType);
2669
2670     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2671     {
2672         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2673                 || !surface->resource.allocatedMemory)
2674         {
2675             /* In some cases we want to disable client storage.
2676              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2677              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2678              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2679              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2680              */
2681             gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2682             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2683             surface->flags &= ~SFLAG_CLIENT;
2684             enable_client_storage = TRUE;
2685         }
2686         else
2687         {
2688             surface->flags |= SFLAG_CLIENT;
2689
2690             /* Point OpenGL to our allocated texture memory. Do not use
2691              * resource.allocatedMemory here because it might point into a
2692              * PBO. Instead use heapMemory, but get the alignment right. */
2693             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2694                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2695         }
2696     }
2697
2698     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2699     {
2700         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2701                 internal, width, height, 0, surface->resource.size, mem));
2702         checkGLcall("glCompressedTexImage2DARB");
2703     }
2704     else
2705     {
2706         gl_info->gl_ops.gl.p_glTexImage2D(surface->texture_target, surface->texture_level,
2707                 internal, width, height, 0, format->glFormat, format->glType, mem);
2708         checkGLcall("glTexImage2D");
2709     }
2710
2711     if (enable_client_storage)
2712     {
2713         gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2714         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2715     }
2716 }
2717
2718 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2719  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2720 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2721 /* Context activation is done by the caller. */
2722 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2723 {
2724     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2725     struct wined3d_renderbuffer_entry *entry;
2726     GLuint renderbuffer = 0;
2727     unsigned int src_width, src_height;
2728     unsigned int width, height;
2729
2730     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2731     {
2732         width = rt->pow2Width;
2733         height = rt->pow2Height;
2734     }
2735     else
2736     {
2737         width = surface->pow2Width;
2738         height = surface->pow2Height;
2739     }
2740
2741     src_width = surface->pow2Width;
2742     src_height = surface->pow2Height;
2743
2744     /* A depth stencil smaller than the render target is not valid */
2745     if (width > src_width || height > src_height) return;
2746
2747     /* Remove any renderbuffer set if the sizes match */
2748     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2749             || (width == src_width && height == src_height))
2750     {
2751         surface->current_renderbuffer = NULL;
2752         return;
2753     }
2754
2755     /* Look if we've already got a renderbuffer of the correct dimensions */
2756     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2757     {
2758         if (entry->width == width && entry->height == height)
2759         {
2760             renderbuffer = entry->id;
2761             surface->current_renderbuffer = entry;
2762             break;
2763         }
2764     }
2765
2766     if (!renderbuffer)
2767     {
2768         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2769         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2770         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2771                 surface->resource.format->glInternal, width, height);
2772
2773         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2774         entry->width = width;
2775         entry->height = height;
2776         entry->id = renderbuffer;
2777         list_add_head(&surface->renderbuffers, &entry->entry);
2778
2779         surface->current_renderbuffer = entry;
2780     }
2781
2782     checkGLcall("set_compatible_renderbuffer");
2783 }
2784
2785 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2786 {
2787     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2788
2789     TRACE("surface %p.\n", surface);
2790
2791     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2792     {
2793         ERR("Surface %p is not on a swapchain.\n", surface);
2794         return GL_NONE;
2795     }
2796
2797     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2798     {
2799         if (swapchain->render_to_fbo)
2800         {
2801             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2802             return GL_COLOR_ATTACHMENT0;
2803         }
2804         TRACE("Returning GL_BACK\n");
2805         return GL_BACK;
2806     }
2807     else if (surface == swapchain->front_buffer)
2808     {
2809         TRACE("Returning GL_FRONT\n");
2810         return GL_FRONT;
2811     }
2812
2813     FIXME("Higher back buffer, returning GL_BACK\n");
2814     return GL_BACK;
2815 }
2816
2817 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2818 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2819 {
2820     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2821
2822     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2823         /* No partial locking for textures yet. */
2824         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2825
2826     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2827     if (dirty_rect)
2828     {
2829         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2830         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2831         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2832         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2833     }
2834     else
2835     {
2836         surface->dirtyRect.left = 0;
2837         surface->dirtyRect.top = 0;
2838         surface->dirtyRect.right = surface->resource.width;
2839         surface->dirtyRect.bottom = surface->resource.height;
2840     }
2841
2842     /* if the container is a texture then mark it dirty. */
2843     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2844     {
2845         TRACE("Passing to container.\n");
2846         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2847     }
2848 }
2849
2850 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2851 {
2852     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2853     BOOL ck_changed;
2854
2855     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2856
2857     if (surface->resource.pool == WINED3D_POOL_SCRATCH)
2858     {
2859         ERR("Not supported on scratch surfaces.\n");
2860         return WINED3DERR_INVALIDCALL;
2861     }
2862
2863     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2864
2865     /* Reload if either the texture and sysmem have different ideas about the
2866      * color key, or the actual key values changed. */
2867     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2868             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2869             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2870     {
2871         TRACE("Reloading because of color keying\n");
2872         /* To perform the color key conversion we need a sysmem copy of
2873          * the surface. Make sure we have it. */
2874
2875         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2876         /* Make sure the texture is reloaded because of the color key change,
2877          * this kills performance though :( */
2878         /* TODO: This is not necessarily needed with hw palettized texture support. */
2879         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2880         /* Switching color keying on / off may change the internal format. */
2881         if (ck_changed)
2882             surface_force_reload(surface);
2883     }
2884     else if (!(surface->flags & flag))
2885     {
2886         TRACE("Reloading because surface is dirty.\n");
2887     }
2888     else
2889     {
2890         TRACE("surface is already in texture\n");
2891         return WINED3D_OK;
2892     }
2893
2894     /* No partial locking for textures yet. */
2895     surface_load_location(surface, flag, NULL);
2896     surface_evict_sysmem(surface);
2897
2898     return WINED3D_OK;
2899 }
2900
2901 /* See also float_16_to_32() in wined3d_private.h */
2902 static inline unsigned short float_32_to_16(const float *in)
2903 {
2904     int exp = 0;
2905     float tmp = fabsf(*in);
2906     unsigned int mantissa;
2907     unsigned short ret;
2908
2909     /* Deal with special numbers */
2910     if (*in == 0.0f)
2911         return 0x0000;
2912     if (isnan(*in))
2913         return 0x7c01;
2914     if (isinf(*in))
2915         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2916
2917     if (tmp < powf(2, 10))
2918     {
2919         do
2920         {
2921             tmp = tmp * 2.0f;
2922             exp--;
2923         } while (tmp < powf(2, 10));
2924     }
2925     else if (tmp >= powf(2, 11))
2926     {
2927         do
2928         {
2929             tmp /= 2.0f;
2930             exp++;
2931         } while (tmp >= powf(2, 11));
2932     }
2933
2934     mantissa = (unsigned int)tmp;
2935     if (tmp - mantissa >= 0.5f)
2936         ++mantissa; /* Round to nearest, away from zero. */
2937
2938     exp += 10;  /* Normalize the mantissa. */
2939     exp += 15;  /* Exponent is encoded with excess 15. */
2940
2941     if (exp > 30) /* too big */
2942     {
2943         ret = 0x7c00; /* INF */
2944     }
2945     else if (exp <= 0)
2946     {
2947         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2948         while (exp <= 0)
2949         {
2950             mantissa = mantissa >> 1;
2951             ++exp;
2952         }
2953         ret = mantissa & 0x3ff;
2954     }
2955     else
2956     {
2957         ret = (exp << 10) | (mantissa & 0x3ff);
2958     }
2959
2960     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2961     return ret;
2962 }
2963
2964 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2965 {
2966     ULONG refcount;
2967
2968     TRACE("Surface %p, container %p of type %#x.\n",
2969             surface, surface->container.u.base, surface->container.type);
2970
2971     switch (surface->container.type)
2972     {
2973         case WINED3D_CONTAINER_TEXTURE:
2974             return wined3d_texture_incref(surface->container.u.texture);
2975
2976         case WINED3D_CONTAINER_SWAPCHAIN:
2977             return wined3d_swapchain_incref(surface->container.u.swapchain);
2978
2979         default:
2980             ERR("Unhandled container type %#x.\n", surface->container.type);
2981         case WINED3D_CONTAINER_NONE:
2982             break;
2983     }
2984
2985     refcount = InterlockedIncrement(&surface->resource.ref);
2986     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2987
2988     return refcount;
2989 }
2990
2991 /* Do not call while under the GL lock. */
2992 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2993 {
2994     ULONG refcount;
2995
2996     TRACE("Surface %p, container %p of type %#x.\n",
2997             surface, surface->container.u.base, surface->container.type);
2998
2999     switch (surface->container.type)
3000     {
3001         case WINED3D_CONTAINER_TEXTURE:
3002             return wined3d_texture_decref(surface->container.u.texture);
3003
3004         case WINED3D_CONTAINER_SWAPCHAIN:
3005             return wined3d_swapchain_decref(surface->container.u.swapchain);
3006
3007         default:
3008             ERR("Unhandled container type %#x.\n", surface->container.type);
3009         case WINED3D_CONTAINER_NONE:
3010             break;
3011     }
3012
3013     refcount = InterlockedDecrement(&surface->resource.ref);
3014     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
3015
3016     if (!refcount)
3017     {
3018         surface_cleanup(surface);
3019         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
3020
3021         TRACE("Destroyed surface %p.\n", surface);
3022         HeapFree(GetProcessHeap(), 0, surface);
3023     }
3024
3025     return refcount;
3026 }
3027
3028 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
3029 {
3030     return resource_set_priority(&surface->resource, priority);
3031 }
3032
3033 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
3034 {
3035     return resource_get_priority(&surface->resource);
3036 }
3037
3038 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
3039 {
3040     TRACE("surface %p.\n", surface);
3041
3042     if (!surface->resource.device->d3d_initialized)
3043     {
3044         ERR("D3D not initialized.\n");
3045         return;
3046     }
3047
3048     surface_internal_preload(surface, SRGB_ANY);
3049 }
3050
3051 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
3052 {
3053     TRACE("surface %p.\n", surface);
3054
3055     return surface->resource.parent;
3056 }
3057
3058 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
3059 {
3060     TRACE("surface %p.\n", surface);
3061
3062     return &surface->resource;
3063 }
3064
3065 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
3066 {
3067     TRACE("surface %p, flags %#x.\n", surface, flags);
3068
3069     switch (flags)
3070     {
3071         case WINEDDGBS_CANBLT:
3072         case WINEDDGBS_ISBLTDONE:
3073             return WINED3D_OK;
3074
3075         default:
3076             return WINED3DERR_INVALIDCALL;
3077     }
3078 }
3079
3080 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
3081 {
3082     TRACE("surface %p, flags %#x.\n", surface, flags);
3083
3084     /* XXX: DDERR_INVALIDSURFACETYPE */
3085
3086     switch (flags)
3087     {
3088         case WINEDDGFS_CANFLIP:
3089         case WINEDDGFS_ISFLIPDONE:
3090             return WINED3D_OK;
3091
3092         default:
3093             return WINED3DERR_INVALIDCALL;
3094     }
3095 }
3096
3097 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
3098 {
3099     TRACE("surface %p.\n", surface);
3100
3101     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
3102     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
3103 }
3104
3105 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
3106 {
3107     TRACE("surface %p.\n", surface);
3108
3109     surface->flags &= ~SFLAG_LOST;
3110     return WINED3D_OK;
3111 }
3112
3113 void CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
3114 {
3115     TRACE("surface %p, palette %p.\n", surface, palette);
3116
3117     if (surface->palette == palette)
3118     {
3119         TRACE("Nop palette change.\n");
3120         return;
3121     }
3122
3123     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3124         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3125
3126     surface->palette = palette;
3127
3128     if (palette)
3129     {
3130         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3131             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3132
3133         surface->surface_ops->surface_realize_palette(surface);
3134     }
3135 }
3136
3137 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3138         DWORD flags, const struct wined3d_color_key *color_key)
3139 {
3140     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3141
3142     if (flags & WINEDDCKEY_COLORSPACE)
3143     {
3144         FIXME(" colorkey value not supported (%08x) !\n", flags);
3145         return WINED3DERR_INVALIDCALL;
3146     }
3147
3148     /* Dirtify the surface, but only if a key was changed. */
3149     if (color_key)
3150     {
3151         switch (flags & ~WINEDDCKEY_COLORSPACE)
3152         {
3153             case WINEDDCKEY_DESTBLT:
3154                 surface->dst_blt_color_key = *color_key;
3155                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3156                 break;
3157
3158             case WINEDDCKEY_DESTOVERLAY:
3159                 surface->dst_overlay_color_key = *color_key;
3160                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3161                 break;
3162
3163             case WINEDDCKEY_SRCOVERLAY:
3164                 surface->src_overlay_color_key = *color_key;
3165                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3166                 break;
3167
3168             case WINEDDCKEY_SRCBLT:
3169                 surface->src_blt_color_key = *color_key;
3170                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3171                 break;
3172         }
3173     }
3174     else
3175     {
3176         switch (flags & ~WINEDDCKEY_COLORSPACE)
3177         {
3178             case WINEDDCKEY_DESTBLT:
3179                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3180                 break;
3181
3182             case WINEDDCKEY_DESTOVERLAY:
3183                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3184                 break;
3185
3186             case WINEDDCKEY_SRCOVERLAY:
3187                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3188                 break;
3189
3190             case WINEDDCKEY_SRCBLT:
3191                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3192                 break;
3193         }
3194     }
3195
3196     return WINED3D_OK;
3197 }
3198
3199 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3200 {
3201     TRACE("surface %p.\n", surface);
3202
3203     return surface->palette;
3204 }
3205
3206 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3207 {
3208     const struct wined3d_format *format = surface->resource.format;
3209     DWORD pitch;
3210
3211     TRACE("surface %p.\n", surface);
3212
3213     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3214     {
3215         /* Since compressed formats are block based, pitch means the amount of
3216          * bytes to the next row of block rather than the next row of pixels. */
3217         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3218         pitch = row_block_count * format->block_byte_count;
3219     }
3220     else
3221     {
3222         unsigned char alignment = surface->resource.device->surface_alignment;
3223         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3224         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3225     }
3226
3227     TRACE("Returning %u.\n", pitch);
3228
3229     return pitch;
3230 }
3231
3232 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3233 {
3234     TRACE("surface %p, mem %p.\n", surface, mem);
3235
3236     if (surface->resource.map_count || (surface->flags & SFLAG_DCINUSE))
3237     {
3238         WARN("Surface is mapped or the DC is in use.\n");
3239         return WINED3DERR_INVALIDCALL;
3240     }
3241
3242     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3243     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3244     {
3245         ERR("Not supported on render targets.\n");
3246         return WINED3DERR_INVALIDCALL;
3247     }
3248
3249     if (mem && mem != surface->resource.allocatedMemory)
3250     {
3251         void *release = NULL;
3252
3253         /* Do I have to copy the old surface content? */
3254         if (surface->flags & SFLAG_DIBSECTION)
3255         {
3256             DeleteDC(surface->hDC);
3257             DeleteObject(surface->dib.DIBsection);
3258             surface->dib.bitmap_data = NULL;
3259             surface->resource.allocatedMemory = NULL;
3260             surface->hDC = NULL;
3261             surface->flags &= ~SFLAG_DIBSECTION;
3262         }
3263         else if (!(surface->flags & SFLAG_USERPTR))
3264         {
3265             release = surface->resource.heapMemory;
3266             surface->resource.heapMemory = NULL;
3267         }
3268         surface->resource.allocatedMemory = mem;
3269         surface->flags |= SFLAG_USERPTR;
3270
3271         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3272         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3273
3274         /* For client textures OpenGL has to be notified. */
3275         if (surface->flags & SFLAG_CLIENT)
3276             surface_release_client_storage(surface);
3277
3278         /* Now free the old memory if any. */
3279         HeapFree(GetProcessHeap(), 0, release);
3280     }
3281     else if (surface->flags & SFLAG_USERPTR)
3282     {
3283         /* HeapMemory should be NULL already. */
3284         if (surface->resource.heapMemory)
3285             ERR("User pointer surface has heap memory allocated.\n");
3286
3287         if (!mem)
3288         {
3289             surface->resource.allocatedMemory = NULL;
3290             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3291
3292             if (surface->flags & SFLAG_CLIENT)
3293                 surface_release_client_storage(surface);
3294
3295             surface_prepare_system_memory(surface);
3296         }
3297
3298         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3299     }
3300
3301     return WINED3D_OK;
3302 }
3303
3304 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3305 {
3306     LONG w, h;
3307
3308     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3309
3310     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3311     {
3312         WARN("Not an overlay surface.\n");
3313         return WINEDDERR_NOTAOVERLAYSURFACE;
3314     }
3315
3316     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3317     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3318     surface->overlay_destrect.left = x;
3319     surface->overlay_destrect.top = y;
3320     surface->overlay_destrect.right = x + w;
3321     surface->overlay_destrect.bottom = y + h;
3322
3323     surface_draw_overlay(surface);
3324
3325     return WINED3D_OK;
3326 }
3327
3328 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3329 {
3330     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3331
3332     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3333     {
3334         TRACE("Not an overlay surface.\n");
3335         return WINEDDERR_NOTAOVERLAYSURFACE;
3336     }
3337
3338     if (!surface->overlay_dest)
3339     {
3340         TRACE("Overlay not visible.\n");
3341         *x = 0;
3342         *y = 0;
3343         return WINEDDERR_OVERLAYNOTVISIBLE;
3344     }
3345
3346     *x = surface->overlay_destrect.left;
3347     *y = surface->overlay_destrect.top;
3348
3349     TRACE("Returning position %d, %d.\n", *x, *y);
3350
3351     return WINED3D_OK;
3352 }
3353
3354 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3355         DWORD flags, struct wined3d_surface *ref)
3356 {
3357     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3358
3359     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3360     {
3361         TRACE("Not an overlay surface.\n");
3362         return WINEDDERR_NOTAOVERLAYSURFACE;
3363     }
3364
3365     return WINED3D_OK;
3366 }
3367
3368 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3369         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3370 {
3371     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3372             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3373
3374     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3375     {
3376         WARN("Not an overlay surface.\n");
3377         return WINEDDERR_NOTAOVERLAYSURFACE;
3378     }
3379     else if (!dst_surface)
3380     {
3381         WARN("Dest surface is NULL.\n");
3382         return WINED3DERR_INVALIDCALL;
3383     }
3384
3385     if (src_rect)
3386     {
3387         surface->overlay_srcrect = *src_rect;
3388     }
3389     else
3390     {
3391         surface->overlay_srcrect.left = 0;
3392         surface->overlay_srcrect.top = 0;
3393         surface->overlay_srcrect.right = surface->resource.width;
3394         surface->overlay_srcrect.bottom = surface->resource.height;
3395     }
3396
3397     if (dst_rect)
3398     {
3399         surface->overlay_destrect = *dst_rect;
3400     }
3401     else
3402     {
3403         surface->overlay_destrect.left = 0;
3404         surface->overlay_destrect.top = 0;
3405         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3406         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3407     }
3408
3409     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3410     {
3411         surface->overlay_dest = NULL;
3412         list_remove(&surface->overlay_entry);
3413     }
3414
3415     if (flags & WINEDDOVER_SHOW)
3416     {
3417         if (surface->overlay_dest != dst_surface)
3418         {
3419             surface->overlay_dest = dst_surface;
3420             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3421         }
3422     }
3423     else if (flags & WINEDDOVER_HIDE)
3424     {
3425         /* tests show that the rectangles are erased on hide */
3426         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3427         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3428         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3429         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3430         surface->overlay_dest = NULL;
3431     }
3432
3433     surface_draw_overlay(surface);
3434
3435     return WINED3D_OK;
3436 }
3437
3438 HRESULT CDECL wined3d_surface_update_desc(struct wined3d_surface *surface,
3439         UINT width, UINT height, enum wined3d_format_id format_id,
3440         enum wined3d_multisample_type multisample_type, UINT multisample_quality)
3441 {
3442     struct wined3d_device *device = surface->resource.device;
3443     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
3444     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
3445     UINT resource_size = wined3d_format_calculate_size(format, device->surface_alignment, width, height);
3446
3447     TRACE("surface %p, width %u, height %u, format %s, multisample_type %#x, multisample_quality %u.\n",
3448             surface, width, height, debug_d3dformat(format_id), multisample_type, multisample_type);
3449
3450     if (!resource_size)
3451         return WINED3DERR_INVALIDCALL;
3452
3453     if (device->d3d_initialized)
3454         surface->resource.resource_ops->resource_unload(&surface->resource);
3455
3456     if (surface->flags & SFLAG_DIBSECTION)
3457     {
3458         DeleteDC(surface->hDC);
3459         DeleteObject(surface->dib.DIBsection);
3460         surface->dib.bitmap_data = NULL;
3461         surface->flags &= ~SFLAG_DIBSECTION;
3462     }
3463
3464     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_USERPTR);
3465     surface->resource.allocatedMemory = NULL;
3466     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3467     surface->resource.heapMemory = NULL;
3468
3469     surface->resource.width = width;
3470     surface->resource.height = height;
3471     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[ARB_TEXTURE_RECTANGLE]
3472             || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
3473     {
3474         surface->pow2Width = width;
3475         surface->pow2Height = height;
3476     }
3477     else
3478     {
3479         surface->pow2Width = surface->pow2Height = 1;
3480         while (surface->pow2Width < width)
3481             surface->pow2Width <<= 1;
3482         while (surface->pow2Height < height)
3483             surface->pow2Height <<= 1;
3484     }
3485
3486     if (surface->pow2Width != width || surface->pow2Height != height)
3487         surface->flags |= SFLAG_NONPOW2;
3488     else
3489         surface->flags &= ~SFLAG_NONPOW2;
3490
3491     surface->resource.format = format;
3492     surface->resource.multisample_type = multisample_type;
3493     surface->resource.multisample_quality = multisample_quality;
3494     surface->resource.size = resource_size;
3495
3496     if (!surface_init_sysmem(surface))
3497         return E_OUTOFMEMORY;
3498
3499     return WINED3D_OK;
3500 }
3501
3502 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3503         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3504 {
3505     unsigned short *dst_s;
3506     const float *src_f;
3507     unsigned int x, y;
3508
3509     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3510
3511     for (y = 0; y < h; ++y)
3512     {
3513         src_f = (const float *)(src + y * pitch_in);
3514         dst_s = (unsigned short *) (dst + y * pitch_out);
3515         for (x = 0; x < w; ++x)
3516         {
3517             dst_s[x] = float_32_to_16(src_f + x);
3518         }
3519     }
3520 }
3521
3522 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3523         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3524 {
3525     static const unsigned char convert_5to8[] =
3526     {
3527         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3528         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3529         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3530         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3531     };
3532     static const unsigned char convert_6to8[] =
3533     {
3534         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3535         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3536         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3537         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3538         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3539         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3540         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3541         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3542     };
3543     unsigned int x, y;
3544
3545     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3546
3547     for (y = 0; y < h; ++y)
3548     {
3549         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3550         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3551         for (x = 0; x < w; ++x)
3552         {
3553             WORD pixel = src_line[x];
3554             dst_line[x] = 0xff000000
3555                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3556                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3557                     | convert_5to8[(pixel & 0x001f)];
3558         }
3559     }
3560 }
3561
3562 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3563  * in both cases we're just setting the X / Alpha channel to 0xff. */
3564 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3565         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3566 {
3567     unsigned int x, y;
3568
3569     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3570
3571     for (y = 0; y < h; ++y)
3572     {
3573         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3574         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3575
3576         for (x = 0; x < w; ++x)
3577         {
3578             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3579         }
3580     }
3581 }
3582
3583 static inline BYTE cliptobyte(int x)
3584 {
3585     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3586 }
3587
3588 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3589         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3590 {
3591     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3592     unsigned int x, y;
3593
3594     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3595
3596     for (y = 0; y < h; ++y)
3597     {
3598         const BYTE *src_line = src + y * pitch_in;
3599         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3600         for (x = 0; x < w; ++x)
3601         {
3602             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3603              *     C = Y - 16; D = U - 128; E = V - 128;
3604              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3605              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3606              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3607              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3608              * U and V are shared between the pixels. */
3609             if (!(x & 1)) /* For every even pixel, read new U and V. */
3610             {
3611                 d = (int) src_line[1] - 128;
3612                 e = (int) src_line[3] - 128;
3613                 r2 = 409 * e + 128;
3614                 g2 = - 100 * d - 208 * e + 128;
3615                 b2 = 516 * d + 128;
3616             }
3617             c2 = 298 * ((int) src_line[0] - 16);
3618             dst_line[x] = 0xff000000
3619                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3620                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3621                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3622                 /* Scale RGB values to 0..255 range,
3623                  * then clip them if still not in range (may be negative),
3624                  * then shift them within DWORD if necessary. */
3625             src_line += 2;
3626         }
3627     }
3628 }
3629
3630 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3631         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3632 {
3633     unsigned int x, y;
3634     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3635
3636     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3637
3638     for (y = 0; y < h; ++y)
3639     {
3640         const BYTE *src_line = src + y * pitch_in;
3641         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3642         for (x = 0; x < w; ++x)
3643         {
3644             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3645              *     C = Y - 16; D = U - 128; E = V - 128;
3646              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3647              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3648              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3649              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3650              * U and V are shared between the pixels. */
3651             if (!(x & 1)) /* For every even pixel, read new U and V. */
3652             {
3653                 d = (int) src_line[1] - 128;
3654                 e = (int) src_line[3] - 128;
3655                 r2 = 409 * e + 128;
3656                 g2 = - 100 * d - 208 * e + 128;
3657                 b2 = 516 * d + 128;
3658             }
3659             c2 = 298 * ((int) src_line[0] - 16);
3660             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3661                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3662                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3663                 /* Scale RGB values to 0..255 range,
3664                  * then clip them if still not in range (may be negative),
3665                  * then shift them within DWORD if necessary. */
3666             src_line += 2;
3667         }
3668     }
3669 }
3670
3671 struct d3dfmt_converter_desc
3672 {
3673     enum wined3d_format_id from, to;
3674     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3675 };
3676
3677 static const struct d3dfmt_converter_desc converters[] =
3678 {
3679     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3680     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3681     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3682     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3683     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3684     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3685 };
3686
3687 static inline const struct d3dfmt_converter_desc *find_converter(enum wined3d_format_id from,
3688         enum wined3d_format_id to)
3689 {
3690     unsigned int i;
3691
3692     for (i = 0; i < (sizeof(converters) / sizeof(*converters)); ++i)
3693     {
3694         if (converters[i].from == from && converters[i].to == to)
3695             return &converters[i];
3696     }
3697
3698     return NULL;
3699 }
3700
3701 /*****************************************************************************
3702  * surface_convert_format
3703  *
3704  * Creates a duplicate of a surface in a different format. Is used by Blt to
3705  * blit between surfaces with different formats.
3706  *
3707  * Parameters
3708  *  source: Source surface
3709  *  fmt: Requested destination format
3710  *
3711  *****************************************************************************/
3712 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3713 {
3714     struct wined3d_map_desc src_map, dst_map;
3715     const struct d3dfmt_converter_desc *conv;
3716     struct wined3d_surface *ret = NULL;
3717     HRESULT hr;
3718
3719     conv = find_converter(source->resource.format->id, to_fmt);
3720     if (!conv)
3721     {
3722         FIXME("Cannot find a conversion function from format %s to %s.\n",
3723                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3724         return NULL;
3725     }
3726
3727     /* FIXME: Multisampled conversion? */
3728     if (FAILED(hr = wined3d_surface_create(source->resource.device, source->resource.width, source->resource.height,
3729             to_fmt, 0, WINED3D_POOL_SCRATCH, WINED3D_MULTISAMPLE_NONE, 0,
3730             WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD, NULL, &wined3d_null_parent_ops, &ret)))
3731     {
3732         ERR("Failed to create a destination surface for conversion.\n");
3733         return NULL;
3734     }
3735
3736     memset(&src_map, 0, sizeof(src_map));
3737     memset(&dst_map, 0, sizeof(dst_map));
3738
3739     if (FAILED(hr = wined3d_surface_map(source, &src_map, NULL, WINED3D_MAP_READONLY)))
3740     {
3741         ERR("Failed to lock the source surface.\n");
3742         wined3d_surface_decref(ret);
3743         return NULL;
3744     }
3745     if (FAILED(hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3D_MAP_READONLY)))
3746     {
3747         ERR("Failed to lock the destination surface.\n");
3748         wined3d_surface_unmap(source);
3749         wined3d_surface_decref(ret);
3750         return NULL;
3751     }
3752
3753     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3754             source->resource.width, source->resource.height);
3755
3756     wined3d_surface_unmap(ret);
3757     wined3d_surface_unmap(source);
3758
3759     return ret;
3760 }
3761
3762 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3763         unsigned int bpp, UINT pitch, DWORD color)
3764 {
3765     BYTE *first;
3766     int x, y;
3767
3768     /* Do first row */
3769
3770 #define COLORFILL_ROW(type) \
3771 do { \
3772     type *d = (type *)buf; \
3773     for (x = 0; x < width; ++x) \
3774         d[x] = (type)color; \
3775 } while(0)
3776
3777     switch (bpp)
3778     {
3779         case 1:
3780             COLORFILL_ROW(BYTE);
3781             break;
3782
3783         case 2:
3784             COLORFILL_ROW(WORD);
3785             break;
3786
3787         case 3:
3788         {
3789             BYTE *d = buf;
3790             for (x = 0; x < width; ++x, d += 3)
3791             {
3792                 d[0] = (color      ) & 0xff;
3793                 d[1] = (color >>  8) & 0xff;
3794                 d[2] = (color >> 16) & 0xff;
3795             }
3796             break;
3797         }
3798         case 4:
3799             COLORFILL_ROW(DWORD);
3800             break;
3801
3802         default:
3803             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3804             return WINED3DERR_NOTAVAILABLE;
3805     }
3806
3807 #undef COLORFILL_ROW
3808
3809     /* Now copy first row. */
3810     first = buf;
3811     for (y = 1; y < height; ++y)
3812     {
3813         buf += pitch;
3814         memcpy(buf, first, width * bpp);
3815     }
3816
3817     return WINED3D_OK;
3818 }
3819
3820 struct wined3d_surface * CDECL wined3d_surface_from_resource(struct wined3d_resource *resource)
3821 {
3822     return surface_from_resource(resource);
3823 }
3824
3825 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3826 {
3827     TRACE("surface %p.\n", surface);
3828
3829     if (!surface->resource.map_count)
3830     {
3831         WARN("Trying to unmap unmapped surface.\n");
3832         return WINEDDERR_NOTLOCKED;
3833     }
3834     --surface->resource.map_count;
3835
3836     surface->surface_ops->surface_unmap(surface);
3837
3838     return WINED3D_OK;
3839 }
3840
3841 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3842         struct wined3d_map_desc *map_desc, const RECT *rect, DWORD flags)
3843 {
3844     const struct wined3d_format *format = surface->resource.format;
3845
3846     TRACE("surface %p, map_desc %p, rect %s, flags %#x.\n",
3847             surface, map_desc, wine_dbgstr_rect(rect), flags);
3848
3849     if (surface->resource.map_count)
3850     {
3851         WARN("Surface is already mapped.\n");
3852         return WINED3DERR_INVALIDCALL;
3853     }
3854
3855     if ((format->flags & WINED3DFMT_FLAG_BLOCKS) && rect
3856             && !surface_check_block_align(surface, rect))
3857     {
3858         WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3859                 wine_dbgstr_rect(rect), format->block_width, format->block_height);
3860
3861         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
3862             return WINED3DERR_INVALIDCALL;
3863     }
3864
3865     ++surface->resource.map_count;
3866
3867     if (!(surface->flags & SFLAG_LOCKABLE))
3868         WARN("Trying to lock unlockable surface.\n");
3869
3870     /* Performance optimization: Count how often a surface is mapped, if it is
3871      * mapped regularly do not throw away the system memory copy. This avoids
3872      * the need to download the surface from OpenGL all the time. The surface
3873      * is still downloaded if the OpenGL texture is changed. */
3874     if (!(surface->flags & SFLAG_DYNLOCK))
3875     {
3876         if (++surface->lockCount > MAXLOCKCOUNT)
3877         {
3878             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3879             surface->flags |= SFLAG_DYNLOCK;
3880         }
3881     }
3882
3883     surface->surface_ops->surface_map(surface, rect, flags);
3884
3885     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3886         map_desc->row_pitch = surface->resource.width * format->byte_count;
3887     else
3888         map_desc->row_pitch = wined3d_surface_get_pitch(surface);
3889     map_desc->slice_pitch = 0;
3890
3891     if (!rect)
3892     {
3893         map_desc->data = surface->resource.allocatedMemory;
3894         surface->lockedRect.left = 0;
3895         surface->lockedRect.top = 0;
3896         surface->lockedRect.right = surface->resource.width;
3897         surface->lockedRect.bottom = surface->resource.height;
3898     }
3899     else
3900     {
3901         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3902         {
3903             /* Compressed textures are block based, so calculate the offset of
3904              * the block that contains the top-left pixel of the locked rectangle. */
3905             map_desc->data = surface->resource.allocatedMemory
3906                     + ((rect->top / format->block_height) * map_desc->row_pitch)
3907                     + ((rect->left / format->block_width) * format->block_byte_count);
3908         }
3909         else
3910         {
3911             map_desc->data = surface->resource.allocatedMemory
3912                     + (map_desc->row_pitch * rect->top)
3913                     + (rect->left * format->byte_count);
3914         }
3915         surface->lockedRect.left = rect->left;
3916         surface->lockedRect.top = rect->top;
3917         surface->lockedRect.right = rect->right;
3918         surface->lockedRect.bottom = rect->bottom;
3919     }
3920
3921     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3922     TRACE("Returning memory %p, pitch %u.\n", map_desc->data, map_desc->row_pitch);
3923
3924     return WINED3D_OK;
3925 }
3926
3927 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3928 {
3929     struct wined3d_map_desc map;
3930     HRESULT hr;
3931
3932     TRACE("surface %p, dc %p.\n", surface, dc);
3933
3934     if (surface->flags & SFLAG_USERPTR)
3935     {
3936         ERR("Not supported on surfaces with application-provided memory.\n");
3937         return WINEDDERR_NODC;
3938     }
3939
3940     /* Give more detailed info for ddraw. */
3941     if (surface->flags & SFLAG_DCINUSE)
3942         return WINEDDERR_DCALREADYCREATED;
3943
3944     /* Can't GetDC if the surface is locked. */
3945     if (surface->resource.map_count)
3946         return WINED3DERR_INVALIDCALL;
3947
3948     /* Create a DIB section if there isn't a dc yet. */
3949     if (!surface->hDC)
3950     {
3951         if (surface->flags & SFLAG_CLIENT)
3952         {
3953             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3954             surface_release_client_storage(surface);
3955         }
3956         hr = surface_create_dib_section(surface);
3957         if (FAILED(hr))
3958             return WINED3DERR_INVALIDCALL;
3959
3960         /* Use the DIB section from now on if we are not using a PBO. */
3961         if (!(surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)))
3962         {
3963             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
3964             surface->resource.heapMemory = NULL;
3965             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3966         }
3967     }
3968
3969     /* Map the surface. */
3970     hr = wined3d_surface_map(surface, &map, NULL, 0);
3971     if (FAILED(hr))
3972     {
3973         ERR("Map failed, hr %#x.\n", hr);
3974         return hr;
3975     }
3976
3977     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3978      * activates the allocatedMemory. */
3979     if (surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM))
3980         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3981
3982     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3983             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3984     {
3985         /* GetDC on palettized formats is unsupported in D3D9, and the method
3986          * is missing in D3D8, so this should only be used for DX <=7
3987          * surfaces (with non-device palettes). */
3988         const PALETTEENTRY *pal = NULL;
3989
3990         if (surface->palette)
3991         {
3992             pal = surface->palette->palents;
3993         }
3994         else
3995         {
3996             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3997             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3998
3999             if (dds_primary && dds_primary->palette)
4000                 pal = dds_primary->palette->palents;
4001         }
4002
4003         if (pal)
4004         {
4005             RGBQUAD col[256];
4006             unsigned int i;
4007
4008             for (i = 0; i < 256; ++i)
4009             {
4010                 col[i].rgbRed = pal[i].peRed;
4011                 col[i].rgbGreen = pal[i].peGreen;
4012                 col[i].rgbBlue = pal[i].peBlue;
4013                 col[i].rgbReserved = 0;
4014             }
4015             SetDIBColorTable(surface->hDC, 0, 256, col);
4016         }
4017     }
4018
4019     surface->flags |= SFLAG_DCINUSE;
4020
4021     *dc = surface->hDC;
4022     TRACE("Returning dc %p.\n", *dc);
4023
4024     return WINED3D_OK;
4025 }
4026
4027 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
4028 {
4029     TRACE("surface %p, dc %p.\n", surface, dc);
4030
4031     if (!(surface->flags & SFLAG_DCINUSE))
4032         return WINEDDERR_NODC;
4033
4034     if (surface->hDC != dc)
4035     {
4036         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
4037                 dc, surface->hDC);
4038         return WINEDDERR_NODC;
4039     }
4040
4041     /* Copy the contents of the DIB over to the PBO. */
4042     if ((surface->flags & (SFLAG_PBO | SFLAG_PIN_SYSMEM)) && surface->resource.allocatedMemory)
4043         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
4044
4045     /* We locked first, so unlock now. */
4046     wined3d_surface_unmap(surface);
4047
4048     surface->flags &= ~SFLAG_DCINUSE;
4049
4050     return WINED3D_OK;
4051 }
4052
4053 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
4054 {
4055     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
4056
4057     if (flags)
4058     {
4059         static UINT once;
4060         if (!once++)
4061             FIXME("Ignoring flags %#x.\n", flags);
4062         else
4063             WARN("Ignoring flags %#x.\n", flags);
4064     }
4065
4066     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4067     {
4068         ERR("Not supported on swapchain surfaces.\n");
4069         return WINEDDERR_NOTFLIPPABLE;
4070     }
4071
4072     /* Flipping is only supported on render targets and overlays. */
4073     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
4074     {
4075         WARN("Tried to flip a non-render target, non-overlay surface.\n");
4076         return WINEDDERR_NOTFLIPPABLE;
4077     }
4078
4079     flip_surface(surface, override);
4080
4081     /* Update overlays if they're visible. */
4082     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
4083         return surface_draw_overlay(surface);
4084
4085     return WINED3D_OK;
4086 }
4087
4088 /* Do not call while under the GL lock. */
4089 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
4090 {
4091     struct wined3d_device *device = surface->resource.device;
4092
4093     TRACE("iface %p, srgb %#x.\n", surface, srgb);
4094
4095     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4096     {
4097         struct wined3d_texture *texture = surface->container.u.texture;
4098
4099         TRACE("Passing to container (%p).\n", texture);
4100         texture->texture_ops->texture_preload(texture, srgb);
4101     }
4102     else
4103     {
4104         struct wined3d_context *context;
4105
4106         TRACE("(%p) : About to load surface\n", surface);
4107
4108         /* TODO: Use already acquired context when possible. */
4109         context = context_acquire(device, NULL);
4110
4111         surface_load(surface, srgb == SRGB_SRGB);
4112
4113         if (surface->resource.pool == WINED3D_POOL_DEFAULT)
4114         {
4115             /* Tell opengl to try and keep this texture in video ram (well mostly) */
4116             GLclampf tmp;
4117             tmp = 0.9f;
4118             context->gl_info->gl_ops.gl.p_glPrioritizeTextures(1, &surface->texture_name, &tmp);
4119         }
4120
4121         context_release(context);
4122     }
4123 }
4124
4125 /* Read the framebuffer back into the surface */
4126 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4127 {
4128     struct wined3d_device *device = surface->resource.device;
4129     const struct wined3d_gl_info *gl_info;
4130     struct wined3d_context *context;
4131     BYTE *mem;
4132     GLint fmt;
4133     GLint type;
4134     BYTE *row, *top, *bottom;
4135     int i;
4136     BOOL bpp;
4137     RECT local_rect;
4138     BOOL srcIsUpsideDown;
4139     GLint rowLen = 0;
4140     GLint skipPix = 0;
4141     GLint skipRow = 0;
4142
4143     context = context_acquire(device, surface);
4144     context_apply_blit_state(context, device);
4145     gl_info = context->gl_info;
4146
4147     /* Select the correct read buffer, and give some debug output.
4148      * There is no need to keep track of the current read buffer or reset it, every part of the code
4149      * that reads sets the read buffer as desired.
4150      */
4151     if (surface_is_offscreen(surface))
4152     {
4153         /* Mapping the primary render target which is not on a swapchain.
4154          * Read from the back buffer. */
4155         TRACE("Mapping offscreen render target.\n");
4156         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
4157         srcIsUpsideDown = TRUE;
4158     }
4159     else
4160     {
4161         /* Onscreen surfaces are always part of a swapchain */
4162         GLenum buffer = surface_get_gl_buffer(surface);
4163         TRACE("Mapping %#x buffer.\n", buffer);
4164         gl_info->gl_ops.gl.p_glReadBuffer(buffer);
4165         checkGLcall("glReadBuffer");
4166         srcIsUpsideDown = FALSE;
4167     }
4168
4169     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4170     if (!rect)
4171     {
4172         local_rect.left = 0;
4173         local_rect.top = 0;
4174         local_rect.right = surface->resource.width;
4175         local_rect.bottom = surface->resource.height;
4176     }
4177     else
4178     {
4179         local_rect = *rect;
4180     }
4181     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4182
4183     switch (surface->resource.format->id)
4184     {
4185         case WINED3DFMT_P8_UINT:
4186         {
4187             if (primary_render_target_is_p8(device))
4188             {
4189                 /* In case of P8 render targets the index is stored in the alpha component */
4190                 fmt = GL_ALPHA;
4191                 type = GL_UNSIGNED_BYTE;
4192                 mem = dest;
4193                 bpp = surface->resource.format->byte_count;
4194             }
4195             else
4196             {
4197                 /* GL can't return palettized data, so read ARGB pixels into a
4198                  * separate block of memory and convert them into palettized format
4199                  * in software. Slow, but if the app means to use palettized render
4200                  * targets and locks it...
4201                  *
4202                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4203                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4204                  * for the color channels when palettizing the colors.
4205                  */
4206                 fmt = GL_RGB;
4207                 type = GL_UNSIGNED_BYTE;
4208                 pitch *= 3;
4209                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4210                 if (!mem)
4211                 {
4212                     ERR("Out of memory\n");
4213                     return;
4214                 }
4215                 bpp = surface->resource.format->byte_count * 3;
4216             }
4217         }
4218         break;
4219
4220         default:
4221             mem = dest;
4222             fmt = surface->resource.format->glFormat;
4223             type = surface->resource.format->glType;
4224             bpp = surface->resource.format->byte_count;
4225     }
4226
4227     if (surface->flags & SFLAG_PBO)
4228     {
4229         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4230         checkGLcall("glBindBufferARB");
4231         if (mem)
4232         {
4233             ERR("mem not null for pbo -- unexpected\n");
4234             mem = NULL;
4235         }
4236     }
4237
4238     /* Save old pixel store pack state */
4239     gl_info->gl_ops.gl.p_glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4240     checkGLcall("glGetIntegerv");
4241     gl_info->gl_ops.gl.p_glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4242     checkGLcall("glGetIntegerv");
4243     gl_info->gl_ops.gl.p_glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4244     checkGLcall("glGetIntegerv");
4245
4246     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4247     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4248     checkGLcall("glPixelStorei");
4249     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4250     checkGLcall("glPixelStorei");
4251     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4252     checkGLcall("glPixelStorei");
4253
4254     gl_info->gl_ops.gl.p_glReadPixels(local_rect.left,
4255             !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4256             local_rect.right - local_rect.left,
4257             local_rect.bottom - local_rect.top,
4258             fmt, type, mem);
4259     checkGLcall("glReadPixels");
4260
4261     /* Reset previous pixel store pack state */
4262     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4263     checkGLcall("glPixelStorei");
4264     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4265     checkGLcall("glPixelStorei");
4266     gl_info->gl_ops.gl.p_glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4267     checkGLcall("glPixelStorei");
4268
4269     if (surface->flags & SFLAG_PBO)
4270     {
4271         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4272         checkGLcall("glBindBufferARB");
4273
4274         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4275          * to get a pointer to it and perform the flipping in software. This is a lot
4276          * faster than calling glReadPixels for each line. In case we want more speed
4277          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4278         if (!srcIsUpsideDown)
4279         {
4280             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4281             checkGLcall("glBindBufferARB");
4282
4283             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4284             checkGLcall("glMapBufferARB");
4285         }
4286     }
4287
4288     /* TODO: Merge this with the palettization loop below for P8 targets */
4289     if(!srcIsUpsideDown) {
4290         UINT len, off;
4291         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4292             Flip the lines in software */
4293         len = (local_rect.right - local_rect.left) * bpp;
4294         off = local_rect.left * bpp;
4295
4296         row = HeapAlloc(GetProcessHeap(), 0, len);
4297         if(!row) {
4298             ERR("Out of memory\n");
4299             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4300                 HeapFree(GetProcessHeap(), 0, mem);
4301             return;
4302         }
4303
4304         top = mem + pitch * local_rect.top;
4305         bottom = mem + pitch * (local_rect.bottom - 1);
4306         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4307             memcpy(row, top + off, len);
4308             memcpy(top + off, bottom + off, len);
4309             memcpy(bottom + off, row, len);
4310             top += pitch;
4311             bottom -= pitch;
4312         }
4313         HeapFree(GetProcessHeap(), 0, row);
4314
4315         /* Unmap the temp PBO buffer */
4316         if (surface->flags & SFLAG_PBO)
4317         {
4318             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4319             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4320         }
4321     }
4322
4323     context_release(context);
4324
4325     /* For P8 textures we need to perform an inverse palette lookup. This is
4326      * done by searching for a palette index which matches the RGB value.
4327      * Note this isn't guaranteed to work when there are multiple entries for
4328      * the same color but we have no choice. In case of P8 render targets,
4329      * the index is stored in the alpha component so no conversion is needed. */
4330     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4331     {
4332         const PALETTEENTRY *pal = NULL;
4333         DWORD width = pitch / 3;
4334         int x, y, c;
4335
4336         if (surface->palette)
4337         {
4338             pal = surface->palette->palents;
4339         }
4340         else
4341         {
4342             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4343             HeapFree(GetProcessHeap(), 0, mem);
4344             return;
4345         }
4346
4347         for(y = local_rect.top; y < local_rect.bottom; y++) {
4348             for(x = local_rect.left; x < local_rect.right; x++) {
4349                 /*                      start              lines            pixels      */
4350                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4351                 const BYTE *green = blue  + 1;
4352                 const BYTE *red = green + 1;
4353
4354                 for(c = 0; c < 256; c++) {
4355                     if(*red   == pal[c].peRed   &&
4356                        *green == pal[c].peGreen &&
4357                        *blue  == pal[c].peBlue)
4358                     {
4359                         *((BYTE *) dest + y * width + x) = c;
4360                         break;
4361                     }
4362                 }
4363             }
4364         }
4365         HeapFree(GetProcessHeap(), 0, mem);
4366     }
4367 }
4368
4369 /* Read the framebuffer contents into a texture. Note that this function
4370  * doesn't do any kind of flipping. Using this on an onscreen surface will
4371  * result in a flipped D3D texture. */
4372 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4373 {
4374     struct wined3d_device *device = surface->resource.device;
4375     const struct wined3d_gl_info *gl_info;
4376     struct wined3d_context *context;
4377
4378     context = context_acquire(device, surface);
4379     gl_info = context->gl_info;
4380     device_invalidate_state(device, STATE_FRAMEBUFFER);
4381
4382     surface_prepare_texture(surface, context, srgb);
4383     surface_bind_and_dirtify(surface, context, srgb);
4384
4385     TRACE("Reading back offscreen render target %p.\n", surface);
4386
4387     if (surface_is_offscreen(surface))
4388         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
4389     else
4390         gl_info->gl_ops.gl.p_glReadBuffer(surface_get_gl_buffer(surface));
4391     checkGLcall("glReadBuffer");
4392
4393     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4394             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4395     checkGLcall("glCopyTexSubImage2D");
4396
4397     context_release(context);
4398 }
4399
4400 /* Context activation is done by the caller. */
4401 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4402         struct wined3d_context *context, BOOL srgb)
4403 {
4404     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4405     enum wined3d_conversion_type convert;
4406     struct wined3d_format format;
4407
4408     if (surface->flags & alloc_flag) return;
4409
4410     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4411     if (convert != WINED3D_CT_NONE || format.convert)
4412         surface->flags |= SFLAG_CONVERTED;
4413     else surface->flags &= ~SFLAG_CONVERTED;
4414
4415     surface_bind_and_dirtify(surface, context, srgb);
4416     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4417     surface->flags |= alloc_flag;
4418 }
4419
4420 /* Context activation is done by the caller. */
4421 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4422 {
4423     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4424     {
4425         struct wined3d_texture *texture = surface->container.u.texture;
4426         UINT sub_count = texture->level_count * texture->layer_count;
4427         UINT i;
4428
4429         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4430
4431         for (i = 0; i < sub_count; ++i)
4432         {
4433             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4434             surface_prepare_texture_internal(s, context, srgb);
4435         }
4436
4437         return;
4438     }
4439
4440     surface_prepare_texture_internal(surface, context, srgb);
4441 }
4442
4443 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4444 {
4445     if (multisample)
4446     {
4447         if (surface->rb_multisample)
4448             return;
4449
4450         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4451         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4452         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4453                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4454         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4455     }
4456     else
4457     {
4458         if (surface->rb_resolved)
4459             return;
4460
4461         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4462         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4463         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4464                 surface->pow2Width, surface->pow2Height);
4465         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4466     }
4467 }
4468
4469 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4470         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4471 {
4472     struct wined3d_device *device = surface->resource.device;
4473     UINT pitch = wined3d_surface_get_pitch(surface);
4474     const struct wined3d_gl_info *gl_info;
4475     struct wined3d_context *context;
4476     RECT local_rect;
4477     UINT w, h;
4478
4479     surface_get_rect(surface, rect, &local_rect);
4480
4481     mem += local_rect.top * pitch + local_rect.left * bpp;
4482     w = local_rect.right - local_rect.left;
4483     h = local_rect.bottom - local_rect.top;
4484
4485     /* Activate the correct context for the render target */
4486     context = context_acquire(device, surface);
4487     context_apply_blit_state(context, device);
4488     gl_info = context->gl_info;
4489
4490     if (!surface_is_offscreen(surface))
4491     {
4492         GLenum buffer = surface_get_gl_buffer(surface);
4493         TRACE("Unlocking %#x buffer.\n", buffer);
4494         context_set_draw_buffer(context, buffer);
4495
4496         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4497         gl_info->gl_ops.gl.p_glPixelZoom(1.0f, -1.0f);
4498     }
4499     else
4500     {
4501         /* Primary offscreen render target */
4502         TRACE("Offscreen render target.\n");
4503         context_set_draw_buffer(context, device->offscreenBuffer);
4504
4505         gl_info->gl_ops.gl.p_glPixelZoom(1.0f, 1.0f);
4506     }
4507
4508     gl_info->gl_ops.gl.p_glRasterPos3i(local_rect.left, local_rect.top, 1);
4509     checkGLcall("glRasterPos3i");
4510
4511     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4512     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4513
4514     if (surface->flags & SFLAG_PBO)
4515     {
4516         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4517         checkGLcall("glBindBufferARB");
4518     }
4519
4520     gl_info->gl_ops.gl.p_glDrawPixels(w, h, fmt, type, mem);
4521     checkGLcall("glDrawPixels");
4522
4523     if (surface->flags & SFLAG_PBO)
4524     {
4525         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4526         checkGLcall("glBindBufferARB");
4527     }
4528
4529     gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4530     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4531
4532     if (wined3d_settings.strict_draw_ordering
4533             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4534             && surface->container.u.swapchain->front_buffer == surface))
4535         gl_info->gl_ops.gl.p_glFlush();
4536
4537     context_release(context);
4538 }
4539
4540 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4541 {
4542     /* FIXME: Is this really how color keys are supposed to work? I think it
4543      * makes more sense to compare the individual channels. */
4544     return color >= color_key->color_space_low_value
4545             && color <= color_key->color_space_high_value;
4546 }
4547
4548 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4549 {
4550     const struct wined3d_device *device = surface->resource.device;
4551     const struct wined3d_palette *pal = surface->palette;
4552     BOOL index_in_alpha = FALSE;
4553     unsigned int i;
4554
4555     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4556      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4557      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4558      * duplicate entries. Store the color key in the unused alpha component to speed the
4559      * download up and to make conversion unneeded. */
4560     index_in_alpha = primary_render_target_is_p8(device);
4561
4562     if (!pal)
4563     {
4564         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4565         if (index_in_alpha)
4566         {
4567             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4568              * there's no palette at this time. */
4569             for (i = 0; i < 256; i++) table[i][3] = i;
4570         }
4571     }
4572     else
4573     {
4574         TRACE("Using surface palette %p\n", pal);
4575         /* Get the surface's palette */
4576         for (i = 0; i < 256; ++i)
4577         {
4578             table[i][0] = pal->palents[i].peRed;
4579             table[i][1] = pal->palents[i].peGreen;
4580             table[i][2] = pal->palents[i].peBlue;
4581
4582             /* When index_in_alpha is set the palette index is stored in the
4583              * alpha component. In case of a readback we can then read
4584              * GL_ALPHA. Color keying is handled in BltOverride using a
4585              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4586              * color key itself is passed to glAlphaFunc in other cases the
4587              * alpha component of pixels that should be masked away is set to 0. */
4588             if (index_in_alpha)
4589                 table[i][3] = i;
4590             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4591                 table[i][3] = 0x00;
4592             else if (pal->flags & WINEDDPCAPS_ALPHA)
4593                 table[i][3] = pal->palents[i].peFlags;
4594             else
4595                 table[i][3] = 0xff;
4596         }
4597     }
4598 }
4599
4600 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width, UINT height,
4601         UINT outpitch, enum wined3d_conversion_type conversion_type, struct wined3d_surface *surface)
4602 {
4603     const BYTE *source;
4604     BYTE *dest;
4605
4606     TRACE("src %p, dst %p, pitch %u, width %u, height %u, outpitch %u, conversion_type %#x, surface %p.\n",
4607             src, dst, pitch, width, height, outpitch, conversion_type, surface);
4608
4609     switch (conversion_type)
4610     {
4611         case WINED3D_CT_NONE:
4612         {
4613             memcpy(dst, src, pitch * height);
4614             break;
4615         }
4616
4617         case WINED3D_CT_PALETTED:
4618         case WINED3D_CT_PALETTED_CK:
4619         {
4620             BYTE table[256][4];
4621             unsigned int x, y;
4622
4623             d3dfmt_p8_init_palette(surface, table, (conversion_type == WINED3D_CT_PALETTED_CK));
4624
4625             for (y = 0; y < height; y++)
4626             {
4627                 source = src + pitch * y;
4628                 dest = dst + outpitch * y;
4629                 /* This is an 1 bpp format, using the width here is fine */
4630                 for (x = 0; x < width; x++) {
4631                     BYTE color = *source++;
4632                     *dest++ = table[color][0];
4633                     *dest++ = table[color][1];
4634                     *dest++ = table[color][2];
4635                     *dest++ = table[color][3];
4636                 }
4637             }
4638         }
4639         break;
4640
4641         case WINED3D_CT_CK_565:
4642         {
4643             /* Converting the 565 format in 5551 packed to emulate color-keying.
4644
4645               Note : in all these conversion, it would be best to average the averaging
4646                       pixels to get the color of the pixel that will be color-keyed to
4647                       prevent 'color bleeding'. This will be done later on if ever it is
4648                       too visible.
4649
4650               Note2: Nvidia documents say that their driver does not support alpha + color keying
4651                      on the same surface and disables color keying in such a case
4652             */
4653             unsigned int x, y;
4654             const WORD *Source;
4655             WORD *Dest;
4656
4657             TRACE("Color keyed 565\n");
4658
4659             for (y = 0; y < height; y++) {
4660                 Source = (const WORD *)(src + y * pitch);
4661                 Dest = (WORD *) (dst + y * outpitch);
4662                 for (x = 0; x < width; x++ ) {
4663                     WORD color = *Source++;
4664                     *Dest = ((color & 0xffc0) | ((color & 0x1f) << 1));
4665                     if (!color_in_range(&surface->src_blt_color_key, color))
4666                         *Dest |= 0x0001;
4667                     Dest++;
4668                 }
4669             }
4670         }
4671         break;
4672
4673         case WINED3D_CT_CK_5551:
4674         {
4675             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4676             unsigned int x, y;
4677             const WORD *Source;
4678             WORD *Dest;
4679             TRACE("Color keyed 5551\n");
4680             for (y = 0; y < height; y++) {
4681                 Source = (const WORD *)(src + y * pitch);
4682                 Dest = (WORD *) (dst + y * outpitch);
4683                 for (x = 0; x < width; x++ ) {
4684                     WORD color = *Source++;
4685                     *Dest = color;
4686                     if (!color_in_range(&surface->src_blt_color_key, color))
4687                         *Dest |= (1 << 15);
4688                     else
4689                         *Dest &= ~(1 << 15);
4690                     Dest++;
4691                 }
4692             }
4693         }
4694         break;
4695
4696         case WINED3D_CT_CK_RGB24:
4697         {
4698             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4699             unsigned int x, y;
4700             for (y = 0; y < height; y++)
4701             {
4702                 source = src + pitch * y;
4703                 dest = dst + outpitch * y;
4704                 for (x = 0; x < width; x++) {
4705                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4706                     DWORD dstcolor = color << 8;
4707                     if (!color_in_range(&surface->src_blt_color_key, color))
4708                         dstcolor |= 0xff;
4709                     *(DWORD*)dest = dstcolor;
4710                     source += 3;
4711                     dest += 4;
4712                 }
4713             }
4714         }
4715         break;
4716
4717         case WINED3D_CT_RGB32_888:
4718         {
4719             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4720             unsigned int x, y;
4721             for (y = 0; y < height; y++)
4722             {
4723                 source = src + pitch * y;
4724                 dest = dst + outpitch * y;
4725                 for (x = 0; x < width; x++) {
4726                     DWORD color = 0xffffff & *(const DWORD*)source;
4727                     DWORD dstcolor = color << 8;
4728                     if (!color_in_range(&surface->src_blt_color_key, color))
4729                         dstcolor |= 0xff;
4730                     *(DWORD*)dest = dstcolor;
4731                     source += 4;
4732                     dest += 4;
4733                 }
4734             }
4735         }
4736         break;
4737
4738         case WINED3D_CT_CK_ARGB32:
4739         {
4740             unsigned int x, y;
4741             for (y = 0; y < height; ++y)
4742             {
4743                 source = src + pitch * y;
4744                 dest = dst + outpitch * y;
4745                 for (x = 0; x < width; ++x)
4746                 {
4747                     DWORD color = *(const DWORD *)source;
4748                     if (color_in_range(&surface->src_blt_color_key, color))
4749                         color &= ~0xff000000;
4750                     *(DWORD*)dest = color;
4751                     source += 4;
4752                     dest += 4;
4753                 }
4754             }
4755         }
4756         break;
4757
4758         default:
4759             ERR("Unsupported conversion type %#x.\n", conversion_type);
4760     }
4761     return WINED3D_OK;
4762 }
4763
4764 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4765 {
4766     /* Flip the surface contents */
4767     /* Flip the DC */
4768     {
4769         HDC tmp;
4770         tmp = front->hDC;
4771         front->hDC = back->hDC;
4772         back->hDC = tmp;
4773     }
4774
4775     /* Flip the DIBsection */
4776     {
4777         HBITMAP tmp = front->dib.DIBsection;
4778         front->dib.DIBsection = back->dib.DIBsection;
4779         back->dib.DIBsection = tmp;
4780     }
4781
4782     /* Flip the surface data */
4783     {
4784         void* tmp;
4785
4786         tmp = front->dib.bitmap_data;
4787         front->dib.bitmap_data = back->dib.bitmap_data;
4788         back->dib.bitmap_data = tmp;
4789
4790         tmp = front->resource.allocatedMemory;
4791         front->resource.allocatedMemory = back->resource.allocatedMemory;
4792         back->resource.allocatedMemory = tmp;
4793
4794         tmp = front->resource.heapMemory;
4795         front->resource.heapMemory = back->resource.heapMemory;
4796         back->resource.heapMemory = tmp;
4797     }
4798
4799     /* Flip the PBO */
4800     {
4801         GLuint tmp_pbo = front->pbo;
4802         front->pbo = back->pbo;
4803         back->pbo = tmp_pbo;
4804     }
4805
4806     /* Flip the opengl texture */
4807     {
4808         GLuint tmp;
4809
4810         tmp = back->texture_name;
4811         back->texture_name = front->texture_name;
4812         front->texture_name = tmp;
4813
4814         tmp = back->texture_name_srgb;
4815         back->texture_name_srgb = front->texture_name_srgb;
4816         front->texture_name_srgb = tmp;
4817
4818         tmp = back->rb_multisample;
4819         back->rb_multisample = front->rb_multisample;
4820         front->rb_multisample = tmp;
4821
4822         tmp = back->rb_resolved;
4823         back->rb_resolved = front->rb_resolved;
4824         front->rb_resolved = tmp;
4825
4826         resource_unload(&back->resource);
4827         resource_unload(&front->resource);
4828     }
4829
4830     {
4831         DWORD tmp_flags = back->flags;
4832         back->flags = front->flags;
4833         front->flags = tmp_flags;
4834     }
4835 }
4836
4837 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4838  * pixel copy calls. */
4839 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4840         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4841 {
4842     struct wined3d_device *device = dst_surface->resource.device;
4843     const struct wined3d_gl_info *gl_info;
4844     float xrel, yrel;
4845     UINT row;
4846     struct wined3d_context *context;
4847     BOOL upsidedown = FALSE;
4848     RECT dst_rect = *dst_rect_in;
4849     GLenum dst_target;
4850
4851     if (dst_surface->container.type == WINED3D_CONTAINER_TEXTURE)
4852         dst_target = dst_surface->container.u.texture->target;
4853     else
4854         dst_target = dst_surface->texture_target;
4855
4856     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4857      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4858      */
4859     if(dst_rect.top > dst_rect.bottom) {
4860         UINT tmp = dst_rect.bottom;
4861         dst_rect.bottom = dst_rect.top;
4862         dst_rect.top = tmp;
4863         upsidedown = TRUE;
4864     }
4865
4866     context = context_acquire(device, src_surface);
4867     gl_info = context->gl_info;
4868     context_apply_blit_state(context, device);
4869     surface_internal_preload(dst_surface, SRGB_RGB);
4870
4871     /* Bind the target texture */
4872     context_bind_texture(context, dst_target, dst_surface->texture_name);
4873     if (surface_is_offscreen(src_surface))
4874     {
4875         TRACE("Reading from an offscreen target\n");
4876         upsidedown = !upsidedown;
4877         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
4878     }
4879     else
4880     {
4881         gl_info->gl_ops.gl.p_glReadBuffer(surface_get_gl_buffer(src_surface));
4882     }
4883     checkGLcall("glReadBuffer");
4884
4885     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4886     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4887
4888     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4889     {
4890         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4891
4892         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4893             ERR("Texture filtering not supported in direct blit.\n");
4894     }
4895     else if ((filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT)
4896             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4897     {
4898         ERR("Texture filtering not supported in direct blit\n");
4899     }
4900
4901     if (upsidedown
4902             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4903             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4904     {
4905         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do. */
4906         gl_info->gl_ops.gl.p_glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4907                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4908                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4909                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4910     }
4911     else
4912     {
4913         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4914         /* I have to process this row by row to swap the image,
4915          * otherwise it would be upside down, so stretching in y direction
4916          * doesn't cost extra time
4917          *
4918          * However, stretching in x direction can be avoided if not necessary
4919          */
4920         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4921             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4922             {
4923                 /* Well, that stuff works, but it's very slow.
4924                  * find a better way instead
4925                  */
4926                 UINT col;
4927
4928                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4929                 {
4930                     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4931                             dst_rect.left + col /* x offset */, row /* y offset */,
4932                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4933                 }
4934             }
4935             else
4936             {
4937                 gl_info->gl_ops.gl.p_glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4938                         dst_rect.left /* x offset */, row /* y offset */,
4939                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4940             }
4941         }
4942     }
4943     checkGLcall("glCopyTexSubImage2D");
4944
4945     context_release(context);
4946
4947     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4948      * path is never entered
4949      */
4950     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4951 }
4952
4953 /* Uses the hardware to stretch and flip the image */
4954 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4955         const RECT *src_rect, const RECT *dst_rect_in, enum wined3d_texture_filter_type filter)
4956 {
4957     struct wined3d_device *device = dst_surface->resource.device;
4958     struct wined3d_swapchain *src_swapchain = NULL;
4959     GLuint src, backup = 0;
4960     float left, right, top, bottom; /* Texture coordinates */
4961     UINT fbwidth = src_surface->resource.width;
4962     UINT fbheight = src_surface->resource.height;
4963     const struct wined3d_gl_info *gl_info;
4964     struct wined3d_context *context;
4965     GLenum drawBuffer = GL_BACK;
4966     GLenum texture_target;
4967     BOOL noBackBufferBackup;
4968     BOOL src_offscreen;
4969     BOOL upsidedown = FALSE;
4970     RECT dst_rect = *dst_rect_in;
4971
4972     TRACE("Using hwstretch blit\n");
4973     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4974     context = context_acquire(device, src_surface);
4975     gl_info = context->gl_info;
4976     context_apply_blit_state(context, device);
4977     surface_internal_preload(dst_surface, SRGB_RGB);
4978
4979     src_offscreen = surface_is_offscreen(src_surface);
4980     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4981     if (!noBackBufferBackup && !src_surface->texture_name)
4982     {
4983         /* Get it a description */
4984         surface_internal_preload(src_surface, SRGB_RGB);
4985     }
4986
4987     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4988      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4989      */
4990     if (context->aux_buffers >= 2)
4991     {
4992         /* Got more than one aux buffer? Use the 2nd aux buffer */
4993         drawBuffer = GL_AUX1;
4994     }
4995     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4996     {
4997         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4998         drawBuffer = GL_AUX0;
4999     }
5000
5001     if (noBackBufferBackup)
5002     {
5003         gl_info->gl_ops.gl.p_glGenTextures(1, &backup);
5004         checkGLcall("glGenTextures");
5005         context_bind_texture(context, GL_TEXTURE_2D, backup);
5006         texture_target = GL_TEXTURE_2D;
5007     }
5008     else
5009     {
5010         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5011          * we are reading from the back buffer, the backup can be used as source texture
5012          */
5013         texture_target = src_surface->texture_target;
5014         context_bind_texture(context, texture_target, src_surface->texture_name);
5015         gl_info->gl_ops.gl.p_glEnable(texture_target);
5016         checkGLcall("glEnable(texture_target)");
5017
5018         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5019         src_surface->flags &= ~SFLAG_INTEXTURE;
5020     }
5021
5022     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5023      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5024      */
5025     if(dst_rect.top > dst_rect.bottom) {
5026         UINT tmp = dst_rect.bottom;
5027         dst_rect.bottom = dst_rect.top;
5028         dst_rect.top = tmp;
5029         upsidedown = TRUE;
5030     }
5031
5032     if (src_offscreen)
5033     {
5034         TRACE("Reading from an offscreen target\n");
5035         upsidedown = !upsidedown;
5036         gl_info->gl_ops.gl.p_glReadBuffer(device->offscreenBuffer);
5037     }
5038     else
5039     {
5040         gl_info->gl_ops.gl.p_glReadBuffer(surface_get_gl_buffer(src_surface));
5041     }
5042
5043     /* TODO: Only back up the part that will be overwritten */
5044     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(texture_target, 0, 0, 0, 0, 0, fbwidth, fbheight);
5045
5046     checkGLcall("glCopyTexSubImage2D");
5047
5048     /* No issue with overriding these - the sampler is dirty due to blit usage */
5049     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5050             wined3d_gl_mag_filter(magLookup, filter));
5051     checkGLcall("glTexParameteri");
5052     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5053             wined3d_gl_min_mip_filter(minMipLookup, filter, WINED3D_TEXF_NONE));
5054     checkGLcall("glTexParameteri");
5055
5056     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5057         src_swapchain = src_surface->container.u.swapchain;
5058     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5059     {
5060         src = backup ? backup : src_surface->texture_name;
5061     }
5062     else
5063     {
5064         gl_info->gl_ops.gl.p_glReadBuffer(GL_FRONT);
5065         checkGLcall("glReadBuffer(GL_FRONT)");
5066
5067         gl_info->gl_ops.gl.p_glGenTextures(1, &src);
5068         checkGLcall("glGenTextures(1, &src)");
5069         context_bind_texture(context, GL_TEXTURE_2D, src);
5070
5071         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5072          * out for power of 2 sizes
5073          */
5074         gl_info->gl_ops.gl.p_glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5075                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5076         checkGLcall("glTexImage2D");
5077         gl_info->gl_ops.gl.p_glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, fbwidth, fbheight);
5078
5079         gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5080         checkGLcall("glTexParameteri");
5081         gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5082         checkGLcall("glTexParameteri");
5083
5084         gl_info->gl_ops.gl.p_glReadBuffer(GL_BACK);
5085         checkGLcall("glReadBuffer(GL_BACK)");
5086
5087         if (texture_target != GL_TEXTURE_2D)
5088         {
5089             gl_info->gl_ops.gl.p_glDisable(texture_target);
5090             gl_info->gl_ops.gl.p_glEnable(GL_TEXTURE_2D);
5091             texture_target = GL_TEXTURE_2D;
5092         }
5093     }
5094     checkGLcall("glEnd and previous");
5095
5096     left = src_rect->left;
5097     right = src_rect->right;
5098
5099     if (!upsidedown)
5100     {
5101         top = src_surface->resource.height - src_rect->top;
5102         bottom = src_surface->resource.height - src_rect->bottom;
5103     }
5104     else
5105     {
5106         top = src_surface->resource.height - src_rect->bottom;
5107         bottom = src_surface->resource.height - src_rect->top;
5108     }
5109
5110     if (src_surface->flags & SFLAG_NORMCOORD)
5111     {
5112         left /= src_surface->pow2Width;
5113         right /= src_surface->pow2Width;
5114         top /= src_surface->pow2Height;
5115         bottom /= src_surface->pow2Height;
5116     }
5117
5118     /* draw the source texture stretched and upside down. The correct surface is bound already */
5119     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5120     gl_info->gl_ops.gl.p_glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5121
5122     context_set_draw_buffer(context, drawBuffer);
5123     gl_info->gl_ops.gl.p_glReadBuffer(drawBuffer);
5124
5125     gl_info->gl_ops.gl.p_glBegin(GL_QUADS);
5126         /* bottom left */
5127         gl_info->gl_ops.gl.p_glTexCoord2f(left, bottom);
5128         gl_info->gl_ops.gl.p_glVertex2i(0, 0);
5129
5130         /* top left */
5131         gl_info->gl_ops.gl.p_glTexCoord2f(left, top);
5132         gl_info->gl_ops.gl.p_glVertex2i(0, dst_rect.bottom - dst_rect.top);
5133
5134         /* top right */
5135         gl_info->gl_ops.gl.p_glTexCoord2f(right, top);
5136         gl_info->gl_ops.gl.p_glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5137
5138         /* bottom right */
5139         gl_info->gl_ops.gl.p_glTexCoord2f(right, bottom);
5140         gl_info->gl_ops.gl.p_glVertex2i(dst_rect.right - dst_rect.left, 0);
5141     gl_info->gl_ops.gl.p_glEnd();
5142     checkGLcall("glEnd and previous");
5143
5144     if (texture_target != dst_surface->texture_target)
5145     {
5146         gl_info->gl_ops.gl.p_glDisable(texture_target);
5147         gl_info->gl_ops.gl.p_glEnable(dst_surface->texture_target);
5148         texture_target = dst_surface->texture_target;
5149     }
5150
5151     /* Now read the stretched and upside down image into the destination texture */
5152     context_bind_texture(context, texture_target, dst_surface->texture_name);
5153     gl_info->gl_ops.gl.p_glCopyTexSubImage2D(texture_target,
5154                         0,
5155                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5156                         0, 0, /* We blitted the image to the origin */
5157                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5158     checkGLcall("glCopyTexSubImage2D");
5159
5160     if (drawBuffer == GL_BACK)
5161     {
5162         /* Write the back buffer backup back. */
5163         if (backup)
5164         {
5165             if (texture_target != GL_TEXTURE_2D)
5166             {
5167                 gl_info->gl_ops.gl.p_glDisable(texture_target);
5168                 gl_info->gl_ops.gl.p_glEnable(GL_TEXTURE_2D);
5169                 texture_target = GL_TEXTURE_2D;
5170             }
5171             context_bind_texture(context, GL_TEXTURE_2D, backup);
5172         }
5173         else
5174         {
5175             if (texture_target != src_surface->texture_target)
5176             {
5177                 gl_info->gl_ops.gl.p_glDisable(texture_target);
5178                 gl_info->gl_ops.gl.p_glEnable(src_surface->texture_target);
5179                 texture_target = src_surface->texture_target;
5180             }
5181             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5182         }
5183
5184         gl_info->gl_ops.gl.p_glBegin(GL_QUADS);
5185             /* top left */
5186             gl_info->gl_ops.gl.p_glTexCoord2f(0.0f, 0.0f);
5187             gl_info->gl_ops.gl.p_glVertex2i(0, fbheight);
5188
5189             /* bottom left */
5190             gl_info->gl_ops.gl.p_glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5191             gl_info->gl_ops.gl.p_glVertex2i(0, 0);
5192
5193             /* bottom right */
5194             gl_info->gl_ops.gl.p_glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5195                     (float)fbheight / (float)src_surface->pow2Height);
5196             gl_info->gl_ops.gl.p_glVertex2i(fbwidth, 0);
5197
5198             /* top right */
5199             gl_info->gl_ops.gl.p_glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5200             gl_info->gl_ops.gl.p_glVertex2i(fbwidth, fbheight);
5201         gl_info->gl_ops.gl.p_glEnd();
5202     }
5203     gl_info->gl_ops.gl.p_glDisable(texture_target);
5204     checkGLcall("glDisable(texture_target)");
5205
5206     /* Cleanup */
5207     if (src != src_surface->texture_name && src != backup)
5208     {
5209         gl_info->gl_ops.gl.p_glDeleteTextures(1, &src);
5210         checkGLcall("glDeleteTextures(1, &src)");
5211     }
5212     if (backup)
5213     {
5214         gl_info->gl_ops.gl.p_glDeleteTextures(1, &backup);
5215         checkGLcall("glDeleteTextures(1, &backup)");
5216     }
5217
5218     if (wined3d_settings.strict_draw_ordering)
5219         gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5220
5221     context_release(context);
5222
5223     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5224      * path is never entered
5225      */
5226     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5227 }
5228
5229 /* Front buffer coordinates are always full screen coordinates, but our GL
5230  * drawable is limited to the window's client area. The sysmem and texture
5231  * copies do have the full screen size. Note that GL has a bottom-left
5232  * origin, while D3D has a top-left origin. */
5233 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5234 {
5235     UINT drawable_height;
5236
5237     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5238             && surface == surface->container.u.swapchain->front_buffer)
5239     {
5240         POINT offset = {0, 0};
5241         RECT windowsize;
5242
5243         ScreenToClient(window, &offset);
5244         OffsetRect(rect, offset.x, offset.y);
5245
5246         GetClientRect(window, &windowsize);
5247         drawable_height = windowsize.bottom - windowsize.top;
5248     }
5249     else
5250     {
5251         drawable_height = surface->resource.height;
5252     }
5253
5254     rect->top = drawable_height - rect->top;
5255     rect->bottom = drawable_height - rect->bottom;
5256 }
5257
5258 static void surface_blt_to_drawable(const struct wined3d_device *device,
5259         enum wined3d_texture_filter_type filter, BOOL color_key,
5260         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5261         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5262 {
5263     const struct wined3d_gl_info *gl_info;
5264     struct wined3d_context *context;
5265     RECT src_rect, dst_rect;
5266
5267     src_rect = *src_rect_in;
5268     dst_rect = *dst_rect_in;
5269
5270     /* Make sure the surface is up-to-date. This should probably use
5271      * surface_load_location() and worry about the destination surface too,
5272      * unless we're overwriting it completely. */
5273     surface_internal_preload(src_surface, SRGB_RGB);
5274
5275     /* Activate the destination context, set it up for blitting */
5276     context = context_acquire(device, dst_surface);
5277     gl_info = context->gl_info;
5278     context_apply_blit_state(context, device);
5279
5280     if (!surface_is_offscreen(dst_surface))
5281         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5282
5283     device->blitter->set_shader(device->blit_priv, context, src_surface);
5284
5285     if (color_key)
5286     {
5287         gl_info->gl_ops.gl.p_glEnable(GL_ALPHA_TEST);
5288         checkGLcall("glEnable(GL_ALPHA_TEST)");
5289
5290         /* When the primary render target uses P8, the alpha component
5291          * contains the palette index. Which means that the colorkey is one of
5292          * the palette entries. In other cases pixels that should be masked
5293          * away have alpha set to 0. */
5294         if (primary_render_target_is_p8(device))
5295             gl_info->gl_ops.gl.p_glAlphaFunc(GL_NOTEQUAL,
5296                     (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5297         else
5298             gl_info->gl_ops.gl.p_glAlphaFunc(GL_NOTEQUAL, 0.0f);
5299         checkGLcall("glAlphaFunc");
5300     }
5301     else
5302     {
5303         gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
5304         checkGLcall("glDisable(GL_ALPHA_TEST)");
5305     }
5306
5307     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5308
5309     if (color_key)
5310     {
5311         gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
5312         checkGLcall("glDisable(GL_ALPHA_TEST)");
5313     }
5314
5315     /* Leave the opengl state valid for blitting */
5316     device->blitter->unset_shader(context->gl_info);
5317
5318     if (wined3d_settings.strict_draw_ordering
5319             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5320             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5321         gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5322
5323     context_release(context);
5324 }
5325
5326 /* Do not call while under the GL lock. */
5327 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5328 {
5329     struct wined3d_device *device = s->resource.device;
5330     const struct blit_shader *blitter;
5331
5332     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5333             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5334     if (!blitter)
5335     {
5336         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5337         return WINED3DERR_INVALIDCALL;
5338     }
5339
5340     return blitter->color_fill(device, s, rect, color);
5341 }
5342
5343 /* Do not call while under the GL lock. */
5344 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5345         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5346         enum wined3d_texture_filter_type filter)
5347 {
5348     struct wined3d_device *device = dst_surface->resource.device;
5349     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5350     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5351
5352     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5353             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5354             flags, DDBltFx, debug_d3dtexturefiltertype(filter));
5355
5356     /* Get the swapchain. One of the surfaces has to be a primary surface */
5357     if (dst_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5358     {
5359         WARN("Destination is in sysmem, rejecting gl blt\n");
5360         return WINED3DERR_INVALIDCALL;
5361     }
5362
5363     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5364         dstSwapchain = dst_surface->container.u.swapchain;
5365
5366     if (src_surface)
5367     {
5368         if (src_surface->resource.pool == WINED3D_POOL_SYSTEM_MEM)
5369         {
5370             WARN("Src is in sysmem, rejecting gl blt\n");
5371             return WINED3DERR_INVALIDCALL;
5372         }
5373
5374         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5375             srcSwapchain = src_surface->container.u.swapchain;
5376     }
5377
5378     /* Early sort out of cases where no render target is used */
5379     if (!dstSwapchain && !srcSwapchain
5380             && src_surface != device->fb.render_targets[0]
5381             && dst_surface != device->fb.render_targets[0])
5382     {
5383         TRACE("No surface is render target, not using hardware blit.\n");
5384         return WINED3DERR_INVALIDCALL;
5385     }
5386
5387     /* No destination color keying supported */
5388     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5389     {
5390         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5391         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5392         return WINED3DERR_INVALIDCALL;
5393     }
5394
5395     if (dstSwapchain && dstSwapchain == srcSwapchain)
5396     {
5397         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5398         return WINED3DERR_INVALIDCALL;
5399     }
5400
5401     if (dstSwapchain && srcSwapchain)
5402     {
5403         FIXME("Implement hardware blit between two different swapchains\n");
5404         return WINED3DERR_INVALIDCALL;
5405     }
5406
5407     if (dstSwapchain)
5408     {
5409         /* Handled with regular texture -> swapchain blit */
5410         if (src_surface == device->fb.render_targets[0])
5411             TRACE("Blit from active render target to a swapchain\n");
5412     }
5413     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5414     {
5415         FIXME("Implement blit from a swapchain to the active render target\n");
5416         return WINED3DERR_INVALIDCALL;
5417     }
5418
5419     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5420     {
5421         /* Blit from render target to texture */
5422         BOOL stretchx;
5423
5424         /* P8 read back is not implemented */
5425         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5426                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5427         {
5428             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5429             return WINED3DERR_INVALIDCALL;
5430         }
5431
5432         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5433         {
5434             TRACE("Color keying not supported by frame buffer to texture blit\n");
5435             return WINED3DERR_INVALIDCALL;
5436             /* Destination color key is checked above */
5437         }
5438
5439         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5440             stretchx = TRUE;
5441         else
5442             stretchx = FALSE;
5443
5444         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5445          * flip the image nor scale it.
5446          *
5447          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5448          * -> If the app wants a image width an unscaled width, copy it line per line
5449          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5450          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5451          *    back buffer. This is slower than reading line per line, thus not used for flipping
5452          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5453          *    pixel by pixel. */
5454         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5455                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5456         {
5457             TRACE("No stretching in x direction, using direct framebuffer -> texture copy.\n");
5458             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, filter);
5459         }
5460         else
5461         {
5462             TRACE("Using hardware stretching to flip / stretch the texture.\n");
5463             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, filter);
5464         }
5465
5466         if (!dst_surface->resource.map_count && !(dst_surface->flags & SFLAG_DONOTFREE))
5467         {
5468             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5469             dst_surface->resource.allocatedMemory = NULL;
5470             dst_surface->resource.heapMemory = NULL;
5471         }
5472         else
5473         {
5474             dst_surface->flags &= ~SFLAG_INSYSMEM;
5475         }
5476
5477         return WINED3D_OK;
5478     }
5479     else if (src_surface)
5480     {
5481         /* Blit from offscreen surface to render target */
5482         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5483         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5484
5485         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5486
5487         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5488                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5489                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5490         {
5491             FIXME("Unsupported blit operation falling back to software\n");
5492             return WINED3DERR_INVALIDCALL;
5493         }
5494
5495         /* Color keying: Check if we have to do a color keyed blt,
5496          * and if not check if a color key is activated.
5497          *
5498          * Just modify the color keying parameters in the surface and restore them afterwards
5499          * The surface keeps track of the color key last used to load the opengl surface.
5500          * PreLoad will catch the change to the flags and color key and reload if necessary.
5501          */
5502         if (flags & WINEDDBLT_KEYSRC)
5503         {
5504             /* Use color key from surface */
5505         }
5506         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5507         {
5508             /* Use color key from DDBltFx */
5509             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5510             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5511         }
5512         else
5513         {
5514             /* Do not use color key */
5515             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5516         }
5517
5518         surface_blt_to_drawable(device, filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5519                 src_surface, src_rect, dst_surface, dst_rect);
5520
5521         /* Restore the color key parameters */
5522         src_surface->CKeyFlags = oldCKeyFlags;
5523         src_surface->src_blt_color_key = old_blt_key;
5524
5525         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5526
5527         return WINED3D_OK;
5528     }
5529
5530     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5531     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5532     return WINED3DERR_INVALIDCALL;
5533 }
5534
5535 /* Context activation is done by the caller. */
5536 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5537         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5538 {
5539     struct wined3d_device *device = surface->resource.device;
5540     const struct wined3d_gl_info *gl_info = context->gl_info;
5541     GLint compare_mode = GL_NONE;
5542     struct blt_info info;
5543     GLint old_binding = 0;
5544     RECT rect;
5545
5546     gl_info->gl_ops.gl.p_glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5547
5548     gl_info->gl_ops.gl.p_glDisable(GL_CULL_FACE);
5549     gl_info->gl_ops.gl.p_glDisable(GL_BLEND);
5550     gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
5551     gl_info->gl_ops.gl.p_glDisable(GL_SCISSOR_TEST);
5552     gl_info->gl_ops.gl.p_glDisable(GL_STENCIL_TEST);
5553     gl_info->gl_ops.gl.p_glEnable(GL_DEPTH_TEST);
5554     gl_info->gl_ops.gl.p_glDepthFunc(GL_ALWAYS);
5555     gl_info->gl_ops.gl.p_glDepthMask(GL_TRUE);
5556     gl_info->gl_ops.gl.p_glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5557     gl_info->gl_ops.gl.p_glViewport(x, y, w, h);
5558     gl_info->gl_ops.gl.p_glDepthRange(0.0, 1.0);
5559
5560     SetRect(&rect, 0, h, w, 0);
5561     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5562     context_active_texture(context, context->gl_info, 0);
5563     gl_info->gl_ops.gl.p_glGetIntegerv(info.binding, &old_binding);
5564     gl_info->gl_ops.gl.p_glBindTexture(info.bind_target, texture);
5565     if (gl_info->supported[ARB_SHADOW])
5566     {
5567         gl_info->gl_ops.gl.p_glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5568         if (compare_mode != GL_NONE)
5569             gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5570     }
5571
5572     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5573             gl_info, info.tex_type, &surface->ds_current_size);
5574
5575     gl_info->gl_ops.gl.p_glBegin(GL_TRIANGLE_STRIP);
5576     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[0]);
5577     gl_info->gl_ops.gl.p_glVertex2f(-1.0f, -1.0f);
5578     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[1]);
5579     gl_info->gl_ops.gl.p_glVertex2f(1.0f, -1.0f);
5580     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[2]);
5581     gl_info->gl_ops.gl.p_glVertex2f(-1.0f, 1.0f);
5582     gl_info->gl_ops.gl.p_glTexCoord3fv(info.coords[3]);
5583     gl_info->gl_ops.gl.p_glVertex2f(1.0f, 1.0f);
5584     gl_info->gl_ops.gl.p_glEnd();
5585
5586     if (compare_mode != GL_NONE)
5587         gl_info->gl_ops.gl.p_glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5588     gl_info->gl_ops.gl.p_glBindTexture(info.bind_target, old_binding);
5589
5590     gl_info->gl_ops.gl.p_glPopAttrib();
5591
5592     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5593 }
5594
5595 void surface_modify_ds_location(struct wined3d_surface *surface,
5596         DWORD location, UINT w, UINT h)
5597 {
5598     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5599
5600     if (location & ~(SFLAG_LOCATIONS | SFLAG_DISCARDED))
5601         FIXME("Invalid location (%#x) specified.\n", location);
5602
5603     if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5604             || (!(surface->flags & SFLAG_INTEXTURE) && (location & SFLAG_INTEXTURE)))
5605     {
5606         if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5607         {
5608             TRACE("Passing to container.\n");
5609             wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5610         }
5611     }
5612
5613     surface->ds_current_size.cx = w;
5614     surface->ds_current_size.cy = h;
5615     surface->flags &= ~(SFLAG_LOCATIONS | SFLAG_DISCARDED);
5616     surface->flags |= location;
5617 }
5618
5619 /* Context activation is done by the caller. */
5620 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5621 {
5622     const struct wined3d_gl_info *gl_info = context->gl_info;
5623     struct wined3d_device *device = surface->resource.device;
5624     GLsizei w, h;
5625
5626     TRACE("surface %p, new location %#x.\n", surface, location);
5627
5628     /* TODO: Make this work for modes other than FBO */
5629     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5630
5631     if (!(surface->flags & location))
5632     {
5633         w = surface->ds_current_size.cx;
5634         h = surface->ds_current_size.cy;
5635         surface->ds_current_size.cx = 0;
5636         surface->ds_current_size.cy = 0;
5637     }
5638     else
5639     {
5640         w = surface->resource.width;
5641         h = surface->resource.height;
5642     }
5643
5644     if (surface->ds_current_size.cx == surface->resource.width
5645             && surface->ds_current_size.cy == surface->resource.height)
5646     {
5647         TRACE("Location (%#x) is already up to date.\n", location);
5648         return;
5649     }
5650
5651     if (surface->current_renderbuffer)
5652     {
5653         FIXME("Not supported with fixed up depth stencil.\n");
5654         return;
5655     }
5656
5657     if (surface->flags & SFLAG_DISCARDED)
5658     {
5659         TRACE("Surface was discarded, no need copy data.\n");
5660         switch (location)
5661         {
5662             case SFLAG_INTEXTURE:
5663                 surface_prepare_texture(surface, context, FALSE);
5664                 break;
5665             case SFLAG_INRB_MULTISAMPLE:
5666                 surface_prepare_rb(surface, gl_info, TRUE);
5667                 break;
5668             case SFLAG_INDRAWABLE:
5669                 /* Nothing to do */
5670                 break;
5671             default:
5672                 FIXME("Unhandled location %#x\n", location);
5673         }
5674         surface->flags &= ~SFLAG_DISCARDED;
5675         surface->flags |= location;
5676         surface->ds_current_size.cx = surface->resource.width;
5677         surface->ds_current_size.cy = surface->resource.height;
5678         return;
5679     }
5680
5681     if (!(surface->flags & SFLAG_LOCATIONS))
5682     {
5683         FIXME("No up to date depth stencil location.\n");
5684         surface->flags |= location;
5685         surface->ds_current_size.cx = surface->resource.width;
5686         surface->ds_current_size.cy = surface->resource.height;
5687         return;
5688     }
5689
5690     if (location == SFLAG_INTEXTURE)
5691     {
5692         GLint old_binding = 0;
5693         GLenum bind_target;
5694
5695         /* The render target is allowed to be smaller than the depth/stencil
5696          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5697          * than the offscreen surface. Don't overwrite the offscreen surface
5698          * with undefined data. */
5699         w = min(w, context->swapchain->desc.backbuffer_width);
5700         h = min(h, context->swapchain->desc.backbuffer_height);
5701
5702         TRACE("Copying onscreen depth buffer to depth texture.\n");
5703
5704         if (!device->depth_blt_texture)
5705             gl_info->gl_ops.gl.p_glGenTextures(1, &device->depth_blt_texture);
5706
5707         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5708          * directly on the FBO texture. That's because we need to flip. */
5709         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5710                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5711         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5712         {
5713             gl_info->gl_ops.gl.p_glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5714             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5715         }
5716         else
5717         {
5718             gl_info->gl_ops.gl.p_glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5719             bind_target = GL_TEXTURE_2D;
5720         }
5721         gl_info->gl_ops.gl.p_glBindTexture(bind_target, device->depth_blt_texture);
5722         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5723          * internal format, because the internal format might include stencil
5724          * data. In principle we should copy stencil data as well, but unless
5725          * the driver supports stencil export it's hard to do, and doesn't
5726          * seem to be needed in practice. If the hardware doesn't support
5727          * writing stencil data, the glCopyTexImage2D() call might trigger
5728          * software fallbacks. */
5729         gl_info->gl_ops.gl.p_glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5730         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5731         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5732         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5733         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5734         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5735         gl_info->gl_ops.gl.p_glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5736         gl_info->gl_ops.gl.p_glBindTexture(bind_target, old_binding);
5737
5738         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5739                 NULL, surface, SFLAG_INTEXTURE);
5740         context_set_draw_buffer(context, GL_NONE);
5741         gl_info->gl_ops.gl.p_glReadBuffer(GL_NONE);
5742
5743         /* Do the actual blit */
5744         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5745         checkGLcall("depth_blt");
5746
5747         context_invalidate_state(context, STATE_FRAMEBUFFER);
5748
5749         if (wined3d_settings.strict_draw_ordering)
5750             gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5751     }
5752     else if (location == SFLAG_INDRAWABLE)
5753     {
5754         TRACE("Copying depth texture to onscreen depth buffer.\n");
5755
5756         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5757                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5758         surface_depth_blt(surface, context, surface->texture_name,
5759                 0, surface->pow2Height - h, w, h, surface->texture_target);
5760         checkGLcall("depth_blt");
5761
5762         context_invalidate_state(context, STATE_FRAMEBUFFER);
5763
5764         if (wined3d_settings.strict_draw_ordering)
5765             gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
5766     }
5767     else
5768     {
5769         ERR("Invalid location (%#x) specified.\n", location);
5770     }
5771
5772     surface->flags |= location;
5773     surface->ds_current_size.cx = surface->resource.width;
5774     surface->ds_current_size.cy = surface->resource.height;
5775 }
5776
5777 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5778 {
5779     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5780     struct wined3d_surface *overlay;
5781
5782     TRACE("surface %p, location %s, persistent %#x.\n",
5783             surface, debug_surflocation(location), persistent);
5784
5785     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5786             && !(surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
5787             && (location & SFLAG_INDRAWABLE))
5788         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5789
5790     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5791             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5792         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5793
5794     if (persistent)
5795     {
5796         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5797                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5798         {
5799             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5800             {
5801                 TRACE("Passing to container.\n");
5802                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5803             }
5804         }
5805         surface->flags &= ~SFLAG_LOCATIONS;
5806         surface->flags |= location;
5807
5808         /* Redraw emulated overlays, if any */
5809         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5810         {
5811             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5812             {
5813                 surface_draw_overlay(overlay);
5814             }
5815         }
5816     }
5817     else
5818     {
5819         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5820         {
5821             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5822             {
5823                 TRACE("Passing to container\n");
5824                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5825             }
5826         }
5827         surface->flags &= ~location;
5828     }
5829
5830     if (!(surface->flags & SFLAG_LOCATIONS))
5831     {
5832         ERR("Surface %p does not have any up to date location.\n", surface);
5833     }
5834 }
5835
5836 static DWORD resource_access_from_location(DWORD location)
5837 {
5838     switch (location)
5839     {
5840         case SFLAG_INSYSMEM:
5841             return WINED3D_RESOURCE_ACCESS_CPU;
5842
5843         case SFLAG_INDRAWABLE:
5844         case SFLAG_INSRGBTEX:
5845         case SFLAG_INTEXTURE:
5846         case SFLAG_INRB_MULTISAMPLE:
5847         case SFLAG_INRB_RESOLVED:
5848             return WINED3D_RESOURCE_ACCESS_GPU;
5849
5850         default:
5851             FIXME("Unhandled location %#x.\n", location);
5852             return 0;
5853     }
5854 }
5855
5856 static void surface_load_sysmem(struct wined3d_surface *surface,
5857         const struct wined3d_gl_info *gl_info, const RECT *rect)
5858 {
5859     surface_prepare_system_memory(surface);
5860
5861     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5862         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5863
5864     /* Download the surface to system memory. */
5865     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5866     {
5867         struct wined3d_device *device = surface->resource.device;
5868         struct wined3d_context *context;
5869
5870         /* TODO: Use already acquired context when possible. */
5871         context = context_acquire(device, NULL);
5872
5873         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5874         surface_download_data(surface, gl_info);
5875
5876         context_release(context);
5877
5878         return;
5879     }
5880
5881     if (surface->flags & SFLAG_INDRAWABLE)
5882     {
5883         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5884                 wined3d_surface_get_pitch(surface));
5885         return;
5886     }
5887
5888     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5889             surface, surface->flags & SFLAG_LOCATIONS);
5890 }
5891
5892 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5893         const struct wined3d_gl_info *gl_info, const RECT *rect)
5894 {
5895     struct wined3d_device *device = surface->resource.device;
5896     enum wined3d_conversion_type convert;
5897     struct wined3d_format format;
5898     UINT byte_count;
5899     BYTE *mem;
5900
5901     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5902     {
5903         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5904         return WINED3DERR_INVALIDCALL;
5905     }
5906
5907     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5908         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5909
5910     if (surface->flags & SFLAG_INTEXTURE)
5911     {
5912         RECT r;
5913
5914         surface_get_rect(surface, rect, &r);
5915         surface_blt_to_drawable(device, WINED3D_TEXF_POINT, FALSE, surface, &r, surface, &r);
5916
5917         return WINED3D_OK;
5918     }
5919
5920     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5921     {
5922         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5923          * path through sysmem. */
5924         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5925     }
5926
5927     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5928
5929     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5930      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5931      * called. */
5932     if ((convert != WINED3D_CT_NONE) && (surface->flags & SFLAG_PBO))
5933     {
5934         struct wined3d_context *context;
5935
5936         TRACE("Removing the pbo attached to surface %p.\n", surface);
5937
5938         /* TODO: Use already acquired context when possible. */
5939         context = context_acquire(device, NULL);
5940
5941         surface_remove_pbo(surface, gl_info);
5942
5943         context_release(context);
5944     }
5945
5946     if ((convert != WINED3D_CT_NONE) && surface->resource.allocatedMemory)
5947     {
5948         UINT height = surface->resource.height;
5949         UINT width = surface->resource.width;
5950         UINT src_pitch, dst_pitch;
5951
5952         byte_count = format.conv_byte_count;
5953         src_pitch = wined3d_surface_get_pitch(surface);
5954
5955         /* Stick to the alignment for the converted surface too, makes it
5956          * easier to load the surface. */
5957         dst_pitch = width * byte_count;
5958         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5959
5960         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5961         {
5962             ERR("Out of memory (%u).\n", dst_pitch * height);
5963             return E_OUTOFMEMORY;
5964         }
5965
5966         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5967                 src_pitch, width, height, dst_pitch, convert, surface);
5968
5969         surface->flags |= SFLAG_CONVERTED;
5970     }
5971     else
5972     {
5973         surface->flags &= ~SFLAG_CONVERTED;
5974         mem = surface->resource.allocatedMemory;
5975         byte_count = format.byte_count;
5976     }
5977
5978     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5979
5980     /* Don't delete PBO memory. */
5981     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5982         HeapFree(GetProcessHeap(), 0, mem);
5983
5984     return WINED3D_OK;
5985 }
5986
5987 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5988         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5989 {
5990     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5991     struct wined3d_device *device = surface->resource.device;
5992     enum wined3d_conversion_type convert;
5993     struct wined3d_context *context;
5994     UINT width, src_pitch, dst_pitch;
5995     struct wined3d_bo_address data;
5996     struct wined3d_format format;
5997     POINT dst_point = {0, 0};
5998     BYTE *mem;
5999
6000     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6001             && surface_is_offscreen(surface)
6002             && (surface->flags & SFLAG_INDRAWABLE))
6003     {
6004         surface_load_fb_texture(surface, srgb);
6005
6006         return WINED3D_OK;
6007     }
6008
6009     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6010             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
6011             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6012                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6013                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6014     {
6015         if (srgb)
6016             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INTEXTURE,
6017                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6018         else
6019             surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, SFLAG_INSRGBTEX,
6020                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6021
6022         return WINED3D_OK;
6023     }
6024
6025     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6026             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6027             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6028                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6029                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6030     {
6031         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6032         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6033         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6034
6035         surface_blt_fbo(device, WINED3D_TEXF_POINT, surface, src_location,
6036                 &rect, surface, dst_location, &rect);
6037
6038         return WINED3D_OK;
6039     }
6040
6041     /* Upload from system memory */
6042
6043     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6044             TRUE /* We will use textures */, &format, &convert);
6045
6046     if (srgb)
6047     {
6048         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6049         {
6050             /* Performance warning... */
6051             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6052             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6053         }
6054     }
6055     else
6056     {
6057         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6058         {
6059             /* Performance warning... */
6060             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6061             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6062         }
6063     }
6064
6065     if (!(surface->flags & SFLAG_INSYSMEM))
6066     {
6067         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6068         /* Lets hope we get it from somewhere... */
6069         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6070     }
6071
6072     /* TODO: Use already acquired context when possible. */
6073     context = context_acquire(device, NULL);
6074
6075     surface_prepare_texture(surface, context, srgb);
6076     surface_bind_and_dirtify(surface, context, srgb);
6077
6078     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6079     {
6080         surface->flags |= SFLAG_GLCKEY;
6081         surface->gl_color_key = surface->src_blt_color_key;
6082     }
6083     else surface->flags &= ~SFLAG_GLCKEY;
6084
6085     width = surface->resource.width;
6086     src_pitch = wined3d_surface_get_pitch(surface);
6087
6088     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6089      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6090      * called. */
6091     if ((convert != WINED3D_CT_NONE || format.convert) && (surface->flags & SFLAG_PBO))
6092     {
6093         TRACE("Removing the pbo attached to surface %p.\n", surface);
6094         surface_remove_pbo(surface, gl_info);
6095     }
6096
6097     if (format.convert)
6098     {
6099         /* This code is entered for texture formats which need a fixup. */
6100         UINT height = surface->resource.height;
6101
6102         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6103         dst_pitch = width * format.conv_byte_count;
6104         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6105
6106         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6107         {
6108             ERR("Out of memory (%u).\n", dst_pitch * height);
6109             context_release(context);
6110             return E_OUTOFMEMORY;
6111         }
6112         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6113         format.byte_count = format.conv_byte_count;
6114         src_pitch = dst_pitch;
6115     }
6116     else if (convert != WINED3D_CT_NONE && surface->resource.allocatedMemory)
6117     {
6118         /* This code is only entered for color keying fixups */
6119         UINT height = surface->resource.height;
6120
6121         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6122         dst_pitch = width * format.conv_byte_count;
6123         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6124
6125         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6126         {
6127             ERR("Out of memory (%u).\n", dst_pitch * height);
6128             context_release(context);
6129             return E_OUTOFMEMORY;
6130         }
6131         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6132                 width, height, dst_pitch, convert, surface);
6133         format.byte_count = format.conv_byte_count;
6134         src_pitch = dst_pitch;
6135     }
6136     else
6137     {
6138         mem = surface->resource.allocatedMemory;
6139     }
6140
6141     data.buffer_object = surface->pbo;
6142     data.addr = mem;
6143     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6144
6145     context_release(context);
6146
6147     /* Don't delete PBO memory. */
6148     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6149         HeapFree(GetProcessHeap(), 0, mem);
6150
6151     return WINED3D_OK;
6152 }
6153
6154 static void surface_multisample_resolve(struct wined3d_surface *surface)
6155 {
6156     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6157
6158     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6159         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6160
6161     surface_blt_fbo(surface->resource.device, WINED3D_TEXF_POINT,
6162             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6163 }
6164
6165 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6166 {
6167     struct wined3d_device *device = surface->resource.device;
6168     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6169     HRESULT hr;
6170
6171     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6172
6173     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6174     {
6175         if (location == SFLAG_INTEXTURE && surface->flags & SFLAG_INDRAWABLE)
6176         {
6177             struct wined3d_context *context = context_acquire(device, NULL);
6178             surface_load_ds_location(surface, context, location);
6179             context_release(context);
6180             return WINED3D_OK;
6181         }
6182         else if (location & surface->flags && surface->draw_binding != SFLAG_INDRAWABLE)
6183         {
6184             /* Already up to date, nothing to do. */
6185             return WINED3D_OK;
6186         }
6187         else
6188         {
6189             FIXME("Unimplemented copy from %s to %s for depth/stencil buffers.\n",
6190                     debug_surflocation(surface->flags & SFLAG_LOCATIONS), debug_surflocation(location));
6191             return WINED3DERR_INVALIDCALL;
6192         }
6193     }
6194
6195     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6196         location = SFLAG_INTEXTURE;
6197
6198     if (surface->flags & location)
6199     {
6200         TRACE("Location already up to date.\n");
6201
6202         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6203                 && surface_need_pbo(surface, gl_info))
6204             surface_load_pbo(surface, gl_info);
6205
6206         return WINED3D_OK;
6207     }
6208
6209     if (WARN_ON(d3d_surface))
6210     {
6211         DWORD required_access = resource_access_from_location(location);
6212         if ((surface->resource.access_flags & required_access) != required_access)
6213             WARN("Operation requires %#x access, but surface only has %#x.\n",
6214                     required_access, surface->resource.access_flags);
6215     }
6216
6217     if (!(surface->flags & SFLAG_LOCATIONS))
6218     {
6219         ERR("Surface %p does not have any up to date location.\n", surface);
6220         surface->flags |= SFLAG_LOST;
6221         return WINED3DERR_DEVICELOST;
6222     }
6223
6224     switch (location)
6225     {
6226         case SFLAG_INSYSMEM:
6227             surface_load_sysmem(surface, gl_info, rect);
6228             break;
6229
6230         case SFLAG_INDRAWABLE:
6231             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6232                 return hr;
6233             break;
6234
6235         case SFLAG_INRB_RESOLVED:
6236             surface_multisample_resolve(surface);
6237             break;
6238
6239         case SFLAG_INTEXTURE:
6240         case SFLAG_INSRGBTEX:
6241             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6242                 return hr;
6243             break;
6244
6245         default:
6246             ERR("Don't know how to handle location %#x.\n", location);
6247             break;
6248     }
6249
6250     if (!rect)
6251     {
6252         surface->flags |= location;
6253
6254         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6255             surface_evict_sysmem(surface);
6256     }
6257
6258     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6259             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6260     {
6261         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6262     }
6263
6264     return WINED3D_OK;
6265 }
6266
6267 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6268 {
6269     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6270
6271     /* Not on a swapchain - must be offscreen */
6272     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6273
6274     /* The front buffer is always onscreen */
6275     if (surface == swapchain->front_buffer) return FALSE;
6276
6277     /* If the swapchain is rendered to an FBO, the backbuffer is
6278      * offscreen, otherwise onscreen */
6279     return swapchain->render_to_fbo;
6280 }
6281
6282 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6283 /* Context activation is done by the caller. */
6284 static void ffp_blit_free(struct wined3d_device *device) { }
6285
6286 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6287 /* Context activation is done by the caller. */
6288 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6289 {
6290     BYTE table[256][4];
6291     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) != 0;
6292     GLenum target;
6293
6294     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
6295         target = surface->container.u.texture->target;
6296     else
6297         target = surface->texture_target;
6298
6299     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6300
6301     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6302     GL_EXTCALL(glColorTableEXT(target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6303 }
6304
6305 /* Context activation is done by the caller. */
6306 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6307 {
6308     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6309     const struct wined3d_gl_info *gl_info = context->gl_info;
6310     GLenum target;
6311
6312     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
6313         target = surface->container.u.texture->target;
6314     else
6315         target = surface->texture_target;
6316
6317     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6318      * else the surface is converted in software at upload time in LoadLocation.
6319      */
6320     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6321             && gl_info->supported[EXT_PALETTED_TEXTURE])
6322         ffp_blit_p8_upload_palette(surface, gl_info);
6323
6324     gl_info->gl_ops.gl.p_glEnable(target);
6325     checkGLcall("glEnable(target)");
6326
6327     return WINED3D_OK;
6328 }
6329
6330 /* Context activation is done by the caller. */
6331 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6332 {
6333     gl_info->gl_ops.gl.p_glDisable(GL_TEXTURE_2D);
6334     checkGLcall("glDisable(GL_TEXTURE_2D)");
6335     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6336     {
6337         gl_info->gl_ops.gl.p_glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6338         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6339     }
6340     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6341     {
6342         gl_info->gl_ops.gl.p_glDisable(GL_TEXTURE_RECTANGLE_ARB);
6343         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6344     }
6345 }
6346
6347 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6348         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6349         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6350 {
6351     enum complex_fixup src_fixup;
6352
6353     switch (blit_op)
6354     {
6355         case WINED3D_BLIT_OP_COLOR_BLIT:
6356             if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
6357                 return FALSE;
6358
6359             src_fixup = get_complex_fixup(src_format->color_fixup);
6360             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6361             {
6362                 TRACE("Checking support for fixup:\n");
6363                 dump_color_fixup_desc(src_format->color_fixup);
6364             }
6365
6366             if (!is_identity_fixup(dst_format->color_fixup))
6367             {
6368                 TRACE("Destination fixups are not supported\n");
6369                 return FALSE;
6370             }
6371
6372             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6373             {
6374                 TRACE("P8 fixup supported\n");
6375                 return TRUE;
6376             }
6377
6378             /* We only support identity conversions. */
6379             if (is_identity_fixup(src_format->color_fixup))
6380             {
6381                 TRACE("[OK]\n");
6382                 return TRUE;
6383             }
6384
6385             TRACE("[FAILED]\n");
6386             return FALSE;
6387
6388         case WINED3D_BLIT_OP_COLOR_FILL:
6389             if (dst_pool == WINED3D_POOL_SYSTEM_MEM)
6390                 return FALSE;
6391
6392             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6393             {
6394                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6395                     return FALSE;
6396             }
6397             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6398             {
6399                 TRACE("Color fill not supported\n");
6400                 return FALSE;
6401             }
6402
6403             /* FIXME: We should reject color fills on formats with fixups,
6404              * but this would break P8 color fills for example. */
6405
6406             return TRUE;
6407
6408         case WINED3D_BLIT_OP_DEPTH_FILL:
6409             return TRUE;
6410
6411         default:
6412             TRACE("Unsupported blit_op=%d\n", blit_op);
6413             return FALSE;
6414     }
6415 }
6416
6417 /* Do not call while under the GL lock. */
6418 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6419         const RECT *dst_rect, const struct wined3d_color *color)
6420 {
6421     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6422     struct wined3d_fb_state fb = {&dst_surface, NULL};
6423
6424     device_clear_render_targets(device, 1, &fb, 1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6425
6426     return WINED3D_OK;
6427 }
6428
6429 /* Do not call while under the GL lock. */
6430 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6431         struct wined3d_surface *surface, const RECT *rect, float depth)
6432 {
6433     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6434     struct wined3d_fb_state fb = {NULL, surface};
6435
6436     device_clear_render_targets(device, 0, &fb, 1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6437
6438     return WINED3D_OK;
6439 }
6440
6441 const struct blit_shader ffp_blit =  {
6442     ffp_blit_alloc,
6443     ffp_blit_free,
6444     ffp_blit_set,
6445     ffp_blit_unset,
6446     ffp_blit_supported,
6447     ffp_blit_color_fill,
6448     ffp_blit_depth_fill,
6449 };
6450
6451 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6452 {
6453     return WINED3D_OK;
6454 }
6455
6456 /* Context activation is done by the caller. */
6457 static void cpu_blit_free(struct wined3d_device *device)
6458 {
6459 }
6460
6461 /* Context activation is done by the caller. */
6462 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6463 {
6464     return WINED3D_OK;
6465 }
6466
6467 /* Context activation is done by the caller. */
6468 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6469 {
6470 }
6471
6472 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6473         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
6474         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
6475 {
6476     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6477     {
6478         return TRUE;
6479     }
6480
6481     return FALSE;
6482 }
6483
6484 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6485         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6486         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6487 {
6488     UINT row_block_count;
6489     const BYTE *src_row;
6490     BYTE *dst_row;
6491     UINT x, y;
6492
6493     src_row = src_data;
6494     dst_row = dst_data;
6495
6496     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6497
6498     if (!flags)
6499     {
6500         for (y = 0; y < update_h; y += format->block_height)
6501         {
6502             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6503             src_row += src_pitch;
6504             dst_row += dst_pitch;
6505         }
6506
6507         return WINED3D_OK;
6508     }
6509
6510     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6511     {
6512         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6513
6514         switch (format->id)
6515         {
6516             case WINED3DFMT_DXT1:
6517                 for (y = 0; y < update_h; y += format->block_height)
6518                 {
6519                     struct block
6520                     {
6521                         WORD color[2];
6522                         BYTE control_row[4];
6523                     };
6524
6525                     const struct block *s = (const struct block *)src_row;
6526                     struct block *d = (struct block *)dst_row;
6527
6528                     for (x = 0; x < row_block_count; ++x)
6529                     {
6530                         d[x].color[0] = s[x].color[0];
6531                         d[x].color[1] = s[x].color[1];
6532                         d[x].control_row[0] = s[x].control_row[3];
6533                         d[x].control_row[1] = s[x].control_row[2];
6534                         d[x].control_row[2] = s[x].control_row[1];
6535                         d[x].control_row[3] = s[x].control_row[0];
6536                     }
6537                     src_row -= src_pitch;
6538                     dst_row += dst_pitch;
6539                 }
6540                 return WINED3D_OK;
6541
6542             case WINED3DFMT_DXT3:
6543                 for (y = 0; y < update_h; y += format->block_height)
6544                 {
6545                     struct block
6546                     {
6547                         WORD alpha_row[4];
6548                         WORD color[2];
6549                         BYTE control_row[4];
6550                     };
6551
6552                     const struct block *s = (const struct block *)src_row;
6553                     struct block *d = (struct block *)dst_row;
6554
6555                     for (x = 0; x < row_block_count; ++x)
6556                     {
6557                         d[x].alpha_row[0] = s[x].alpha_row[3];
6558                         d[x].alpha_row[1] = s[x].alpha_row[2];
6559                         d[x].alpha_row[2] = s[x].alpha_row[1];
6560                         d[x].alpha_row[3] = s[x].alpha_row[0];
6561                         d[x].color[0] = s[x].color[0];
6562                         d[x].color[1] = s[x].color[1];
6563                         d[x].control_row[0] = s[x].control_row[3];
6564                         d[x].control_row[1] = s[x].control_row[2];
6565                         d[x].control_row[2] = s[x].control_row[1];
6566                         d[x].control_row[3] = s[x].control_row[0];
6567                     }
6568                     src_row -= src_pitch;
6569                     dst_row += dst_pitch;
6570                 }
6571                 return WINED3D_OK;
6572
6573             default:
6574                 FIXME("Compressed flip not implemented for format %s.\n",
6575                         debug_d3dformat(format->id));
6576                 return E_NOTIMPL;
6577         }
6578     }
6579
6580     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6581             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6582
6583     return E_NOTIMPL;
6584 }
6585
6586 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6587         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6588         const WINEDDBLTFX *fx, enum wined3d_texture_filter_type filter)
6589 {
6590     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6591     const struct wined3d_format *src_format, *dst_format;
6592     struct wined3d_surface *orig_src = src_surface;
6593     struct wined3d_map_desc dst_map, src_map;
6594     const BYTE *sbase = NULL;
6595     HRESULT hr = WINED3D_OK;
6596     const BYTE *sbuf;
6597     BYTE *dbuf;
6598     int x, y;
6599
6600     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6601             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6602             flags, fx, debug_d3dtexturefiltertype(filter));
6603
6604     if (src_surface == dst_surface)
6605     {
6606         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6607         src_map = dst_map;
6608         src_format = dst_surface->resource.format;
6609         dst_format = src_format;
6610     }
6611     else
6612     {
6613         dst_format = dst_surface->resource.format;
6614         if (src_surface)
6615         {
6616             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6617             {
6618                 src_surface = surface_convert_format(src_surface, dst_format->id);
6619                 if (!src_surface)
6620                 {
6621                     /* The conv function writes a FIXME */
6622                     WARN("Cannot convert source surface format to dest format.\n");
6623                     goto release;
6624                 }
6625             }
6626             wined3d_surface_map(src_surface, &src_map, NULL, WINED3D_MAP_READONLY);
6627             src_format = src_surface->resource.format;
6628         }
6629         else
6630         {
6631             src_format = dst_format;
6632         }
6633
6634         wined3d_surface_map(dst_surface, &dst_map, dst_rect, 0);
6635     }
6636
6637     bpp = dst_surface->resource.format->byte_count;
6638     srcheight = src_rect->bottom - src_rect->top;
6639     srcwidth = src_rect->right - src_rect->left;
6640     dstheight = dst_rect->bottom - dst_rect->top;
6641     dstwidth = dst_rect->right - dst_rect->left;
6642     width = (dst_rect->right - dst_rect->left) * bpp;
6643
6644     if (src_surface)
6645         sbase = (BYTE *)src_map.data
6646                 + ((src_rect->top / src_format->block_height) * src_map.row_pitch)
6647                 + ((src_rect->left / src_format->block_width) * src_format->block_byte_count);
6648     if (src_surface != dst_surface)
6649         dbuf = dst_map.data;
6650     else
6651         dbuf = (BYTE *)dst_map.data
6652                 + ((dst_rect->top / dst_format->block_height) * dst_map.row_pitch)
6653                 + ((dst_rect->left / dst_format->block_width) * dst_format->block_byte_count);
6654
6655     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6656     {
6657         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6658
6659         if (src_surface == dst_surface)
6660         {
6661             FIXME("Only plain blits supported on compressed surfaces.\n");
6662             hr = E_NOTIMPL;
6663             goto release;
6664         }
6665
6666         if (srcheight != dstheight || srcwidth != dstwidth)
6667         {
6668             WARN("Stretching not supported on compressed surfaces.\n");
6669             hr = WINED3DERR_INVALIDCALL;
6670             goto release;
6671         }
6672
6673         if (!surface_check_block_align(src_surface, src_rect))
6674         {
6675             WARN("Source rectangle not block-aligned.\n");
6676             hr = WINED3DERR_INVALIDCALL;
6677             goto release;
6678         }
6679
6680         if (!surface_check_block_align(dst_surface, dst_rect))
6681         {
6682             WARN("Destination rectangle not block-aligned.\n");
6683             hr = WINED3DERR_INVALIDCALL;
6684             goto release;
6685         }
6686
6687         hr = surface_cpu_blt_compressed(sbase, dbuf,
6688                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6689                 src_format, flags, fx);
6690         goto release;
6691     }
6692
6693     /* First, all the 'source-less' blits */
6694     if (flags & WINEDDBLT_COLORFILL)
6695     {
6696         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6697         flags &= ~WINEDDBLT_COLORFILL;
6698     }
6699
6700     if (flags & WINEDDBLT_DEPTHFILL)
6701     {
6702         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6703     }
6704     if (flags & WINEDDBLT_ROP)
6705     {
6706         /* Catch some degenerate cases here. */
6707         switch (fx->dwROP)
6708         {
6709             case BLACKNESS:
6710                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6711                 break;
6712             case 0xaa0029: /* No-op */
6713                 break;
6714             case WHITENESS:
6715                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6716                 break;
6717             case SRCCOPY: /* Well, we do that below? */
6718                 break;
6719             default:
6720                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6721                 goto error;
6722         }
6723         flags &= ~WINEDDBLT_ROP;
6724     }
6725     if (flags & WINEDDBLT_DDROPS)
6726     {
6727         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6728     }
6729     /* Now the 'with source' blits. */
6730     if (src_surface)
6731     {
6732         int sx, xinc, sy, yinc;
6733
6734         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6735             goto release;
6736
6737         if (filter != WINED3D_TEXF_NONE && filter != WINED3D_TEXF_POINT
6738                 && (srcwidth != dstwidth || srcheight != dstheight))
6739         {
6740             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6741             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6742         }
6743
6744         xinc = (srcwidth << 16) / dstwidth;
6745         yinc = (srcheight << 16) / dstheight;
6746
6747         if (!flags)
6748         {
6749             /* No effects, we can cheat here. */
6750             if (dstwidth == srcwidth)
6751             {
6752                 if (dstheight == srcheight)
6753                 {
6754                     /* No stretching in either direction. This needs to be as
6755                      * fast as possible. */
6756                     sbuf = sbase;
6757
6758                     /* Check for overlapping surfaces. */
6759                     if (src_surface != dst_surface || dst_rect->top < src_rect->top
6760                             || dst_rect->right <= src_rect->left || src_rect->right <= dst_rect->left)
6761                     {
6762                         /* No overlap, or dst above src, so copy from top downwards. */
6763                         for (y = 0; y < dstheight; ++y)
6764                         {
6765                             memcpy(dbuf, sbuf, width);
6766                             sbuf += src_map.row_pitch;
6767                             dbuf += dst_map.row_pitch;
6768                         }
6769                     }
6770                     else if (dst_rect->top > src_rect->top)
6771                     {
6772                         /* Copy from bottom upwards. */
6773                         sbuf += src_map.row_pitch * dstheight;
6774                         dbuf += dst_map.row_pitch * dstheight;
6775                         for (y = 0; y < dstheight; ++y)
6776                         {
6777                             sbuf -= src_map.row_pitch;
6778                             dbuf -= dst_map.row_pitch;
6779                             memcpy(dbuf, sbuf, width);
6780                         }
6781                     }
6782                     else
6783                     {
6784                         /* Src and dst overlapping on the same line, use memmove. */
6785                         for (y = 0; y < dstheight; ++y)
6786                         {
6787                             memmove(dbuf, sbuf, width);
6788                             sbuf += src_map.row_pitch;
6789                             dbuf += dst_map.row_pitch;
6790                         }
6791                     }
6792                 }
6793                 else
6794                 {
6795                     /* Stretching in y direction only. */
6796                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6797                     {
6798                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6799                         memcpy(dbuf, sbuf, width);
6800                         dbuf += dst_map.row_pitch;
6801                     }
6802                 }
6803             }
6804             else
6805             {
6806                 /* Stretching in X direction. */
6807                 int last_sy = -1;
6808                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6809                 {
6810                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6811
6812                     if ((sy >> 16) == (last_sy >> 16))
6813                     {
6814                         /* This source row is the same as last source row -
6815                          * Copy the already stretched row. */
6816                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6817                     }
6818                     else
6819                     {
6820 #define STRETCH_ROW(type) \
6821 do { \
6822     const type *s = (const type *)sbuf; \
6823     type *d = (type *)dbuf; \
6824     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6825         d[x] = s[sx >> 16]; \
6826 } while(0)
6827
6828                         switch(bpp)
6829                         {
6830                             case 1:
6831                                 STRETCH_ROW(BYTE);
6832                                 break;
6833                             case 2:
6834                                 STRETCH_ROW(WORD);
6835                                 break;
6836                             case 4:
6837                                 STRETCH_ROW(DWORD);
6838                                 break;
6839                             case 3:
6840                             {
6841                                 const BYTE *s;
6842                                 BYTE *d = dbuf;
6843                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6844                                 {
6845                                     DWORD pixel;
6846
6847                                     s = sbuf + 3 * (sx >> 16);
6848                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6849                                     d[0] = (pixel      ) & 0xff;
6850                                     d[1] = (pixel >>  8) & 0xff;
6851                                     d[2] = (pixel >> 16) & 0xff;
6852                                     d += 3;
6853                                 }
6854                                 break;
6855                             }
6856                             default:
6857                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6858                                 hr = WINED3DERR_NOTAVAILABLE;
6859                                 goto error;
6860                         }
6861 #undef STRETCH_ROW
6862                     }
6863                     dbuf += dst_map.row_pitch;
6864                     last_sy = sy;
6865                 }
6866             }
6867         }
6868         else
6869         {
6870             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6871             DWORD keylow = 0xffffffff, keyhigh = 0, keymask = 0xffffffff;
6872             DWORD destkeylow = 0x0, destkeyhigh = 0xffffffff, destkeymask = 0xffffffff;
6873             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6874             {
6875                 /* The color keying flags are checked for correctness in ddraw */
6876                 if (flags & WINEDDBLT_KEYSRC)
6877                 {
6878                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6879                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6880                 }
6881                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6882                 {
6883                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6884                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6885                 }
6886
6887                 if (flags & WINEDDBLT_KEYDEST)
6888                 {
6889                     /* Destination color keys are taken from the source surface! */
6890                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6891                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6892                 }
6893                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6894                 {
6895                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6896                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6897                 }
6898
6899                 if (bpp == 1)
6900                 {
6901                     keymask = 0xff;
6902                 }
6903                 else
6904                 {
6905                     DWORD masks[3];
6906                     get_color_masks(src_format, masks);
6907                     keymask = masks[0]
6908                             | masks[1]
6909                             | masks[2];
6910                 }
6911                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6912             }
6913
6914             if (flags & WINEDDBLT_DDFX)
6915             {
6916                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6917                 LONG tmpxy;
6918                 dTopLeft     = dbuf;
6919                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6920                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6921                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6922
6923                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6924                 {
6925                     /* I don't think we need to do anything about this flag */
6926                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6927                 }
6928                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6929                 {
6930                     tmp          = dTopRight;
6931                     dTopRight    = dTopLeft;
6932                     dTopLeft     = tmp;
6933                     tmp          = dBottomRight;
6934                     dBottomRight = dBottomLeft;
6935                     dBottomLeft  = tmp;
6936                     dstxinc = dstxinc * -1;
6937                 }
6938                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6939                 {
6940                     tmp          = dTopLeft;
6941                     dTopLeft     = dBottomLeft;
6942                     dBottomLeft  = tmp;
6943                     tmp          = dTopRight;
6944                     dTopRight    = dBottomRight;
6945                     dBottomRight = tmp;
6946                     dstyinc = dstyinc * -1;
6947                 }
6948                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6949                 {
6950                     /* I don't think we need to do anything about this flag */
6951                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6952                 }
6953                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6954                 {
6955                     tmp          = dBottomRight;
6956                     dBottomRight = dTopLeft;
6957                     dTopLeft     = tmp;
6958                     tmp          = dBottomLeft;
6959                     dBottomLeft  = dTopRight;
6960                     dTopRight    = tmp;
6961                     dstxinc = dstxinc * -1;
6962                     dstyinc = dstyinc * -1;
6963                 }
6964                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6965                 {
6966                     tmp          = dTopLeft;
6967                     dTopLeft     = dBottomLeft;
6968                     dBottomLeft  = dBottomRight;
6969                     dBottomRight = dTopRight;
6970                     dTopRight    = tmp;
6971                     tmpxy   = dstxinc;
6972                     dstxinc = dstyinc;
6973                     dstyinc = tmpxy;
6974                     dstxinc = dstxinc * -1;
6975                 }
6976                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6977                 {
6978                     tmp          = dTopLeft;
6979                     dTopLeft     = dTopRight;
6980                     dTopRight    = dBottomRight;
6981                     dBottomRight = dBottomLeft;
6982                     dBottomLeft  = tmp;
6983                     tmpxy   = dstxinc;
6984                     dstxinc = dstyinc;
6985                     dstyinc = tmpxy;
6986                     dstyinc = dstyinc * -1;
6987                 }
6988                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6989                 {
6990                     /* I don't think we need to do anything about this flag */
6991                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6992                 }
6993                 dbuf = dTopLeft;
6994                 flags &= ~(WINEDDBLT_DDFX);
6995             }
6996
6997 #define COPY_COLORKEY_FX(type) \
6998 do { \
6999     const type *s; \
7000     type *d = (type *)dbuf, *dx, tmp; \
7001     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7002     { \
7003         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
7004         dx = d; \
7005         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7006         { \
7007             tmp = s[sx >> 16]; \
7008             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7009                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7010             { \
7011                 dx[0] = tmp; \
7012             } \
7013             dx = (type *)(((BYTE *)dx) + dstxinc); \
7014         } \
7015         d = (type *)(((BYTE *)d) + dstyinc); \
7016     } \
7017 } while(0)
7018
7019             switch (bpp)
7020             {
7021                 case 1:
7022                     COPY_COLORKEY_FX(BYTE);
7023                     break;
7024                 case 2:
7025                     COPY_COLORKEY_FX(WORD);
7026                     break;
7027                 case 4:
7028                     COPY_COLORKEY_FX(DWORD);
7029                     break;
7030                 case 3:
7031                 {
7032                     const BYTE *s;
7033                     BYTE *d = dbuf, *dx;
7034                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7035                     {
7036                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7037                         dx = d;
7038                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7039                         {
7040                             DWORD pixel, dpixel = 0;
7041                             s = sbuf + 3 * (sx>>16);
7042                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7043                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7044                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7045                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7046                             {
7047                                 dx[0] = (pixel      ) & 0xff;
7048                                 dx[1] = (pixel >>  8) & 0xff;
7049                                 dx[2] = (pixel >> 16) & 0xff;
7050                             }
7051                             dx += dstxinc;
7052                         }
7053                         d += dstyinc;
7054                     }
7055                     break;
7056                 }
7057                 default:
7058                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7059                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7060                     hr = WINED3DERR_NOTAVAILABLE;
7061                     goto error;
7062 #undef COPY_COLORKEY_FX
7063             }
7064         }
7065     }
7066
7067 error:
7068     if (flags && FIXME_ON(d3d_surface))
7069     {
7070         FIXME("\tUnsupported flags: %#x.\n", flags);
7071     }
7072
7073 release:
7074     wined3d_surface_unmap(dst_surface);
7075     if (src_surface && src_surface != dst_surface)
7076         wined3d_surface_unmap(src_surface);
7077     /* Release the converted surface, if any. */
7078     if (src_surface && src_surface != orig_src)
7079         wined3d_surface_decref(src_surface);
7080
7081     return hr;
7082 }
7083
7084 /* Do not call while under the GL lock. */
7085 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7086         const RECT *dst_rect, const struct wined3d_color *color)
7087 {
7088     static const RECT src_rect;
7089     WINEDDBLTFX BltFx;
7090
7091     memset(&BltFx, 0, sizeof(BltFx));
7092     BltFx.dwSize = sizeof(BltFx);
7093     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7094     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7095             WINEDDBLT_COLORFILL, &BltFx, WINED3D_TEXF_POINT);
7096 }
7097
7098 /* Do not call while under the GL lock. */
7099 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7100         struct wined3d_surface *surface, const RECT *rect, float depth)
7101 {
7102     FIXME("Depth filling not implemented by cpu_blit.\n");
7103     return WINED3DERR_INVALIDCALL;
7104 }
7105
7106 const struct blit_shader cpu_blit =  {
7107     cpu_blit_alloc,
7108     cpu_blit_free,
7109     cpu_blit_set,
7110     cpu_blit_unset,
7111     cpu_blit_supported,
7112     cpu_blit_color_fill,
7113     cpu_blit_depth_fill,
7114 };
7115
7116 static HRESULT surface_init(struct wined3d_surface *surface, UINT alignment, UINT width, UINT height,
7117         enum wined3d_multisample_type multisample_type, UINT multisample_quality,
7118         struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7119         enum wined3d_pool pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7120 {
7121     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7122     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7123     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7124     unsigned int resource_size;
7125     HRESULT hr;
7126
7127     if (multisample_quality > 0)
7128     {
7129         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7130         multisample_quality = 0;
7131     }
7132
7133     /* Quick lockable sanity check.
7134      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7135      * this function is too deep to need to care about things like this.
7136      * Levels need to be checked too, since they all affect what can be done. */
7137     switch (pool)
7138     {
7139         case WINED3D_POOL_SCRATCH:
7140             if (!lockable)
7141             {
7142                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7143                         "which are mutually exclusive, setting lockable to TRUE.\n");
7144                 lockable = TRUE;
7145             }
7146             break;
7147
7148         case WINED3D_POOL_SYSTEM_MEM:
7149             if (!lockable)
7150                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7151             break;
7152
7153         case WINED3D_POOL_MANAGED:
7154             if (usage & WINED3DUSAGE_DYNAMIC)
7155                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7156             break;
7157
7158         case WINED3D_POOL_DEFAULT:
7159             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7160                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7161             break;
7162
7163         default:
7164             FIXME("Unknown pool %#x.\n", pool);
7165             break;
7166     };
7167
7168     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3D_POOL_DEFAULT)
7169         FIXME("Trying to create a render target that isn't in the default pool.\n");
7170
7171     /* FIXME: Check that the format is supported by the device. */
7172
7173     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7174     if (!resource_size)
7175         return WINED3DERR_INVALIDCALL;
7176
7177     if (device->wined3d->flags & WINED3D_NO3D)
7178         surface->surface_ops = &gdi_surface_ops;
7179     else
7180         surface->surface_ops = &surface_ops;
7181
7182     hr = resource_init(&surface->resource, device, WINED3D_RTYPE_SURFACE, format,
7183             multisample_type, multisample_quality, usage, pool, width, height, 1,
7184             resource_size, parent, parent_ops, &surface_resource_ops);
7185     if (FAILED(hr))
7186     {
7187         WARN("Failed to initialize resource, returning %#x.\n", hr);
7188         return hr;
7189     }
7190
7191     /* "Standalone" surface. */
7192     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7193
7194     list_init(&surface->overlays);
7195
7196     /* Flags */
7197     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7198     if (flags & WINED3D_SURFACE_DISCARD)
7199         surface->flags |= SFLAG_DISCARD;
7200     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7201         surface->flags |= SFLAG_PIN_SYSMEM;
7202     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7203         surface->flags |= SFLAG_LOCKABLE;
7204     /* I'm not sure if this qualifies as a hack or as an optimization. It
7205      * seems reasonable to assume that lockable render targets will get
7206      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7207      * creation. However, the other reason we want to do this is that several
7208      * ddraw applications access surface memory while the surface isn't
7209      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7210      * future locks prevents these from crashing. */
7211     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7212         surface->flags |= SFLAG_DYNLOCK;
7213
7214     /* Mark the texture as dirty so that it gets loaded first time around. */
7215     surface_add_dirty_rect(surface, NULL);
7216     list_init(&surface->renderbuffers);
7217
7218     TRACE("surface %p, memory %p, size %u\n",
7219             surface, surface->resource.allocatedMemory, surface->resource.size);
7220
7221     /* Call the private setup routine */
7222     hr = surface->surface_ops->surface_private_setup(surface);
7223     if (FAILED(hr))
7224     {
7225         ERR("Private setup failed, returning %#x\n", hr);
7226         surface_cleanup(surface);
7227         return hr;
7228     }
7229
7230     /* Similar to lockable rendertargets above, creating the DIB section
7231      * during surface initialization prevents the sysmem pointer from changing
7232      * after a wined3d_surface_getdc() call. */
7233     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7234             && SUCCEEDED(surface_create_dib_section(surface)))
7235     {
7236         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7237         surface->resource.heapMemory = NULL;
7238         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7239     }
7240
7241     return hr;
7242 }
7243
7244 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7245         enum wined3d_format_id format_id, DWORD usage, enum wined3d_pool pool,
7246         enum wined3d_multisample_type multisample_type, DWORD multisample_quality, DWORD flags,
7247         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7248 {
7249     struct wined3d_surface *object;
7250     HRESULT hr;
7251
7252     TRACE("device %p, width %u, height %u, format %s\n",
7253             device, width, height, debug_d3dformat(format_id));
7254     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7255             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7256     TRACE("flags %#x, parent %p, parent_ops %p.\n", flags, parent, parent_ops);
7257
7258     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7259     if (!object)
7260         return WINED3DERR_OUTOFVIDEOMEMORY;
7261
7262     if (FAILED(hr = surface_init(object, device->surface_alignment, width, height, multisample_type,
7263             multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops)))
7264     {
7265         WARN("Failed to initialize surface, returning %#x.\n", hr);
7266         HeapFree(GetProcessHeap(), 0, object);
7267         return hr;
7268     }
7269
7270     TRACE("Created surface %p.\n", object);
7271     *surface = object;
7272
7273     return hr;
7274 }